{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1读取数据\n",
    "# 2数据探索与预处理\n",
    "## 2.1标签的分布情况探索与可视化\n",
    "## 2.2缺失值的补全\n",
    "## 2.3数据分箱\n",
    "# 3计算WOE与IV值\n",
    "# 4根据计算结果再探索一下数据\n",
    "# 5WOE编码\n",
    "# 6开始用逻辑回归来进行建模\n",
    "## 6.1筛选特征\n",
    "## 6.2数据集切分\n",
    "## 6.3模型训练与评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import math\n",
    "import numpy as np\n",
    "pd.set_option('display.width', 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149995</th>\n",
       "      <td>149996</td>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>149997</td>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>149998</td>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>149999</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>150000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Unnamed: 0  SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents\n",
       "0                1                 1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0\n",
       "1                2                 0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0\n",
       "2                3                 0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0\n",
       "3                4                 0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0\n",
       "4                5                 0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0\n",
       "...            ...               ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...\n",
       "149995      149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0\n",
       "149996      149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0\n",
       "149997      149998                 0                              0.246044   58                                     0  3870.000000            NaN                               18                        0                             1                                     0                 0.0\n",
       "149998      149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0\n",
       "149999      150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0\n",
       "\n",
       "[150000 rows x 12 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取数据\n",
    "df_train=pd.read_csv('cs-training.csv')\n",
    "df_train#其实第一行完全是可以当索引的 读取数据的时候把第一行当成索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents\n",
       "1                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0\n",
       "2                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0\n",
       "3                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0\n",
       "4                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0\n",
       "5                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...\n",
       "149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0\n",
       "149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0\n",
       "149998                 0                              0.246044   58                                     0  3870.000000            NaN                               18                        0                             1                                     0                 0.0\n",
       "149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0\n",
       "150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0\n",
       "\n",
       "[150000 rows x 11 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train=pd.read_csv('cs-training.csv',index_col='Unnamed: 0')\n",
    "df_train\n",
    "##SeriousDlqin2yrs表示90天以上逾期或更差1代表是 0代表否   \n",
    "#RevolvingUtilizationOfUnsecuredLines 除房地产和汽车贷款等无分期付款债务外，信用卡和个人信用额度的总余额除以信贷限额\n",
    "#age年龄                       NumberOfTime30-59DaysPastDueNotWorse借款人预期30-59天的次数 但在过去两年内没有更糟糕   \n",
    "#DebtRatio债务比（每月偿还的债务，赡养费，生活费除以每月的总收入）          #MonthlyIncome每月收入 \n",
    "#NumberOfOpenCreditLinesAndLoans 公开贷款(如汽车贷款或抵押贷款)和信用额度(如信用卡)的数量\n",
    "#NumberOfTimes90DaysLate 借款人逾期90天（或以上）的次数\n",
    "#NumberRealEstateLoansOrLines抵押贷款和房地产贷款的额度（包括房屋净值信贷）\n",
    "#NumberOfTime60-89DaysPastDueNotWorse借款人逾期60-89天的次数，但在过去两年没有更糟\n",
    "#NumberOfDependents除自己(配偶、子女等)以外的家庭受养人人数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2数据探索与预处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1标签的分布情况探索与可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    139974\n",
       "1     10026\n",
       "Name: SeriousDlqin2yrs, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看一下 SeriousDlqin2yrs的分布情况 \n",
    "df_train['SeriousDlqin2yrs'].value_counts()#里面的数据分布 139974条是0  10026是1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='SeriousDlqin2yrs', ylabel='count'>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAECCAYAAADpdjDfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQw0lEQVR4nO3dfYxldX3H8fenPJjtLq2rjKtrWDfUrSkWV+LUwmaBgUItqbaR2GJaIUp1farGND5gIGqa+lAklkq1uBZhUUuybdQqRZQGkJUFzGyrEar2KQsWQl0o3elqq2b99o97tjvsAzu/uXPP7Oy8X8lmzv2ec+/5HjLcz/zO755zU1VIkjRTPzXfDUiSFhaDQ5LUxOCQJDUxOCRJTQwOSVKTo+e7gVE7/vjja/Xq1fPdhiQtKNu2bXukqsYOtO6ID47Vq1czOTk5321I0oKS5P6DrfNUlSSpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpqMJDiSrEiyZZ/aLyb5crd8TJIbk2xNcvGwNUlSf+Y8OJIsBzYBS6fVAnwIOLYrvQmYrKp1wIuTHDdkTZLUk1FcOb4buAD4m2m1VwG3AS/qHk8Al3TLW4HxIWu3TW8gyQZgA8CqVauGPBx4wduuH/o1dOTZ9sGL5rsFaV7M+Yijqqaqaueex0meCrwCuGLaZkuBB7vlKWDFkLV9e9hYVeNVNT42dsBbrUiSZqmPyfEPAO+sqh9Pq+0ClnTLy7o+hqlJknrSx5vumcAfJ7kdeH6SPwK2Aeu79WuB7UPWJEk9Gfndcavq5/csJ7m9qi5L8izgpiSnAycB9zA4/TTbmiSpJyMbcVTVxMFqVXU/cC5wJ3BOVe0epjaqY5Ak7W/evo+jqh4CNs9VTZLUDyeWJUlNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1MTgkSU0MDklSE4NDktTE4JAkNTE4JElNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1GUlwJFmRZEu3vCrJ7UluTbIxA8ckuTHJ1iQXd9vNuiZJ6s+cB0eS5cAmYGlXei3w+qo6GzgBOBl4EzBZVeuAFyc5bsiaJKknoxhx7AYuAKYAqurSqvpWt+6pwCPABLC5q20FxoesPU6SDUkmk0zu2LFjbo5KkgSMIDiqaqqqdu5bT3IBcF9VPcRgNPJgt2oKWDFkbd8eNlbVeFWNj42NzclxSZIGepkcT3Ii8FbgLV1pF7CkW17W9TFMTZLUk5G/6XZzHjcAF08biWwD1nfLa4HtQ9YkST05uod9XAKsAq5KAvBuBpPnNyU5HTgJuIfB6afZ1iRJPRnZiKOqJrqf76iqZ1TVRPfvK1V1P3AucCdwTlXtHqY2qmOQJO2vjxHHAXWT5JvnqiZJ6ocTy5KkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmIwmOJCuSbOmWj0lyY5KtSS4eRU2S1J85D44ky4FNwNKu9CZgsqrWAS9OctwIapKknoxixLEbuACY6h5PAJu75a3A+Ahqj5NkQ5LJJJM7duwY+oAkSXvNeXBU1VRV7ZxWWgo82C1PAStGUNu3h41VNV5V42NjY3NxWJKkTh+T47uAJd3ysm6fc12TJPWkjzfdbcD6bnktsH0ENUlST47uYR+bgJuSnA6cBNzD4FTTXNYkST0Z2Yijqia6n/cD5wJ3AudU1e65ro3qGCRJ++tjxEFVPcTeT0KNpCZJ6ocTy5KkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmow8OJIsT3JTki1Jru5q1yTZmuSyadvNuiZJ6k8fI44LgU9V1enAcUneDhxVVeuAlUnWJDl/trUe+pckTTOr4EiyvmHzR4HnJHkycAKwGtjcrbsVWA9MDFE7UH8bkkwmmdyxY0dDq5KkQ5lRcCS5ZZ/S+xv28VVgDfBm4NvAk4AHu3VTwApg6RC1/VTVxqoar6rxsbGxhlYlSYdy9BOtTPI84BTgmUku6spLgf9t2Mf7gNdV1VSSPwDeC3y8W7eMQXjtApbMsiZJ6tGh3nhzgJ+PAr/dsI+fBk5OchTwy8AH2HuKaS2wHdg2RE2S1KMnHHFU1TeAbyR5TlVdP8t9vB+4FngWcBfwJ8CWJCuB84BTgRqiJknq0RMGxzRXJnk5cOyewkyDpKq+Bjx3ei3JBHAucHlV7Ry2Jknqz0yD42bgL4E5+YhSVT3G3k9HDV2TJPVnpsExVVVXjLQTSdKCMNPg+GqSG4Drge8DVNUdI+tKknTYmmlw/JjBNRi/xOCTVQUYHJK0CM00OLYzCIs9oSFJWqRaLqALg4vvzgfOGE07kqTD3YxGHFW1adrDq5N8dET9SJIOczMKjiTTRxg/wz7XZUiSFo+ZznGcxd65jR8BbxhNO5Kkw91M5zjeB/wH8BTgEeA7I+tIknRYm2lwfAJ4GvBF4JkM7j0lSVqEZnqq6oSqurBb/lKSr4yqIUnS4W2mwfFQkncC9wCnsffLlCRJi8xMT1W9jkHIvIzBN++9dmQdSZIOazMNjk8BD1TVG4DjGMx5SJIWoZkGx/I9FwFW1fuA40fXkiTpcDbTOY5/T/IO4GsMbnT4vdG1JEk6nM10xPFK4AcM5jj+B7hoVA1Jkg5vM71X1Q+Bq0bciyRpAWi5O64kSQaHJKmNwSFJatJbcCT5aJKXdMvXJNma5LJp62ddkyT1p5fgSHI68PSq+kKS84GjqmodsDLJmmFqffQvSdpr5MGR5Bjg48D2JL8JTACbu9W3AuuHrB1onxuSTCaZ3LFjx9wdjCSplxHHRcA/ApcDLwTeyN6bJE4BK4ClQ9T2U1Ubq2q8qsbHxsbm9GAkabGb6ZXjwzgF2FhVDyf5FLAOWNKtW8YgvHYNUZMk9aiPN95/AU7slseB1ew9xbQW2A5sG6ImSepRHyOOa4BPJHk5cAyDeYrPJ1kJnAecyuD7zLfMsiZJ6tHIRxxV9d9V9VtVdUZVnVZV9zMIj7uBs6pqZ1VNzbY26v4lSY/Xx4hjP1X1GHs/HTV0TZLUHyeXJUlNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1MTgkSU0MDklSE4NDktTE4JAkNTE4JElNDA5JUhODQ5LUxOCQJDUxOCRJTQwOSVITg0OS1MTgkCQ1MTgkSU16C44kK5L8Q7d8TZKtSS6btn7WNUlSf/occVwBLElyPnBUVa0DViZZM0ytx/4lSfQUHEnOBr4PPAxMAJu7VbcC64esHWh/G5JMJpncsWPH3B2IJGn0wZHkWOBdwCVdaSnwYLc8BawYsrafqtpYVeNVNT42NjZ3ByNJ6mXEcQnwkar6r+7xLmBJt7ys62GYmiSpR3288Z4DvDHJ7cDzgZew9xTTWmA7sG2ImiSpR0ePegdVdcae5S48fgPYkmQlcB5wKlBD1CRJPer1VE9VTVTVFINJ7ruBs6pq5zC1PvuXJPUw4jiQqnqMvZ+OGromSeqPk8uSpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKmJwSFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpqMPDiS/GySLya5Jclnkxyb5JokW5NcNm27WdckSf3pY8Txu8CHqupc4GHg5cBRVbUOWJlkTZLzZ1vroX9J0jRHj3oHVfXRaQ/HgFcAV3aPbwXWA6cAm2dZ++d995lkA7ABYNWqVXNzIJIkoMc5jiSnAcuB7wIPduUpYAWwdIjafqpqY1WNV9X42NjYHB+JJC1uvQRHkqcAVwEXA7uAJd2qZV0Pw9QkST3qY3L8WAanl95ZVfcD2xicYgJYC2wfsiZJ6tHI5ziA3wNeAFya5FLgWuDCJCuB84BTgQK2zLImSerRyEccVfXnVbW8qia6f5uACeBu4Kyq2llVU7Otjbp/SdLj9THi2E9VPcbeT0cNXZMk9cfJZUlSE4NDktTE4JAkNTE4JElNDA5JUhODQ5LUxOCQJDUxOCRJTeblAkBJc+OBPzx5vlvQYWjVu7450td3xCFJamJwSJKaGBySpCYGhySpicEhSWpicEiSmhgckqQmBockqYnBIUlqYnBIkpoYHJKkJgaHJKnJggyOJNck2ZrksvnuRZIWmwUXHEnOB46qqnXAyiRr5rsnSVpMUlXz3UOTJB8Gbq6qm5K8DDiuqq7dZ5sNwIbu4XOA7/Tc5pHseOCR+W5COgB/N+fWs6pq7EArFuL3cSwFHuyWp4Bn77tBVW0ENvbZ1GKRZLKqxue7D2lf/m72Z8GdqgJ2AUu65WUszGOQpAVrIb7pbgPWd8trge3z14okLT4L8VTV54AtSVYC5wGnzm87i46nAHW48nezJwtuchwgyXLgXOCOqnp4vvuRpMVkQQaHJGn+LMQ5DknSPDI4NGNesa/DWZIVSbbMdx+LgcGhGfGKfR3OunnPTQyu89KIGRyaqQlgc7d8K3s/Ei0dDnYDFzC4KFgjthA/jqv5ccgr9qX5UlVTAEnmu5VFwRGHZsor9iUB/s+vmfOKfUmAp6o0c5/DK/Yl4QWAauAV+5LA4JAkNXKOQ5LUxOCQJDUxOCRJTQwOHdGSLE3y2SRfSfLJNF4hluTKOerjuiRfTzKZ5DX7rLt9Bs9/epJLDrHNqiS3J7k1ycbWY5VmyuDQke5C4K6qOhP4IdD0ndRV9ZY57OX3gRcB707yvMY+Hq6qDxxis9cCr6+qs4ETgJNn16b0xAwOHekeBF6aZE1VvRq4L8lfJ7kjyUf2bNT9pf7BJF+a/uTpo4EkT0pyQzd6+XSSY5O8J8lEt/6V3b8lSW7s9vGZJP9/vVRVPQr8LXDGgZpN8uwkd3Wjhm1JVnf11Umum7bddUnelWRLd8fiJVV1aVV9q9vkqcAjSTYlOa17zrVJTu1e69Pd42u7dU9LcluSryb52Cz/W2uRMDh0RKuqLwB/AnwmyYeB1wP3VtUZwDOm/eV/KoORyYue4OVe0z33TOCfgIsPst1JwE+6fWxkcIuW6R4FnnyQ574deC+D62We9ETHBiyrqtOBrwOn7CkmuQC4r6oeAq4HfifJscBJVXV3t9lLgI9V1au6x6cD36yq9cDfJfG9QQflL4eOaN3t328Gng+MMThd9NJuJHEi8Mxu03ur6jOHeLmTgHu65XuAX9hn/Z57ef09cG+SLzN4g/7BPts9BfjPg+xjFYM3/d3ANw/Rz6bu5/eAYwGSnAi8FXhLt+424DTg14HPT3vul6eFCMAXgaOS3AI8r6p+coh9axEzOHSkezXw0u6N+F7gauDKqpoALgMe6LbbNYPXuo+9t1o5tXv8I+C4rvZr3c+1wJ1V9avAcgZ/zQOQ5MkMbtly60H28W/AyUmO6l7niXx/+oPuyv4bgIuraidAFwC3AFcAn5y2+b7Hexrwyao6Fzg7yc8dYt9axAwOHen+FHhlN8J4IfBnwHlJ7gBeB3y34bX+Anhu99w1wHUM/op/W5KrGZyCgsENIN+cZCvwdGCyq1/FYPTzjqr69kH28UHgEuBLzCzMpruEwYjlqm7O5syu/lfAA1X1wMGfyr8Clye5i8EI5v7GfWsR8ZYj0mGqmwx/T1VtH+I1fgW4HLi0qm6eo9a0yBkckqQmnqqSJDUxOCRJTQwOSVITg0OS1MTgkCQ1+T88Tjafg9W2bQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#做一下可视化 \n",
    "import seaborn as sns\n",
    "sns.countplot(x='SeriousDlqin2yrs',data=df_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.06684"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看一下违约的比例\n",
    "df_train['SeriousDlqin2yrs'].sum()/len(df_train)#这个就是违约的比例"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2缺失值的补全"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                            0\n",
       "RevolvingUtilizationOfUnsecuredLines        0\n",
       "age                                         0\n",
       "NumberOfTime30-59DaysPastDueNotWorse        0\n",
       "DebtRatio                                   0\n",
       "MonthlyIncome                           29731\n",
       "NumberOfOpenCreditLinesAndLoans             0\n",
       "NumberOfTimes90DaysLate                     0\n",
       "NumberRealEstateLoansOrLines                0\n",
       "NumberOfTime60-89DaysPastDueNotWorse        0\n",
       "NumberOfDependents                       3924\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看看里面数据里面是否有缺失值\n",
    "df_train.isna().sum()#可以看到 MonthlyIncome 有29731个缺失值      NumberOfDependents 有3924个缺失值   总数据有15万个  这个时候需要填补"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                           0.066840\n",
       "RevolvingUtilizationOfUnsecuredLines       6.048438\n",
       "age                                       52.295207\n",
       "NumberOfTime30-59DaysPastDueNotWorse       0.421033\n",
       "DebtRatio                                353.005076\n",
       "MonthlyIncome                           6670.221237\n",
       "NumberOfOpenCreditLinesAndLoans            8.452760\n",
       "NumberOfTimes90DaysLate                    0.265973\n",
       "NumberRealEstateLoansOrLines               1.018240\n",
       "NumberOfTime60-89DaysPastDueNotWorse       0.240387\n",
       "NumberOfDependents                         0.757222\n",
       "dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.mean()#看一下数据的平均数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                           0.000000\n",
       "RevolvingUtilizationOfUnsecuredLines       0.154181\n",
       "age                                       52.000000\n",
       "NumberOfTime30-59DaysPastDueNotWorse       0.000000\n",
       "DebtRatio                                  0.366508\n",
       "MonthlyIncome                           5400.000000\n",
       "NumberOfOpenCreditLinesAndLoans            8.000000\n",
       "NumberOfTimes90DaysLate                    0.000000\n",
       "NumberRealEstateLoansOrLines               1.000000\n",
       "NumberOfTime60-89DaysPastDueNotWorse       0.000000\n",
       "NumberOfDependents                         0.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.median()#这个是中位数  #看看这两个数据的对对比  然后 填充缺失值的打算采用中位数的方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SeriousDlqin2yrs                        0\n",
       "RevolvingUtilizationOfUnsecuredLines    0\n",
       "age                                     0\n",
       "NumberOfTime30-59DaysPastDueNotWorse    0\n",
       "DebtRatio                               0\n",
       "MonthlyIncome                           0\n",
       "NumberOfOpenCreditLinesAndLoans         0\n",
       "NumberOfTimes90DaysLate                 0\n",
       "NumberRealEstateLoansOrLines            0\n",
       "NumberOfTime60-89DaysPastDueNotWorse    0\n",
       "NumberOfDependents                      0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#填充一下缺失值\n",
    "df_train['MonthlyIncome']=df_train['MonthlyIncome'].fillna(df_train['MonthlyIncome'].median())\n",
    "df_train['NumberOfDependents']=df_train['NumberOfDependents'].fillna(df_train['NumberOfDependents'].median())\n",
    "#填充完之后再看看是否有缺失值\n",
    "df_train.isna().sum()#这样缺失值就没有了 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3数据分箱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#下面开始做数据分箱\n",
    "#对于age字段，分成6段 [-math.inf, 25, 40, 50, 60, 70,math.inf]\n",
    "#对于NumberOfDependents（家属人数）字段，分成6段[-math.inf,2,4,6,8,10,math.inf]\n",
    "#对于3种逾期次数，即NumberOfTime30-59DaysPastDueNotWorse， NumberOfTime60-89DaysPastDueNotWorse， NumberOfTimes90DaysLate，分成10段\n",
    "#[-math.inf,1,2,3,4,5,6,7,8,9,math.inf]\n",
    "#对于其余字段，即RevolvingUtilizationOfUnsecuredLines, DebtRatio,MonthlyIncome, NumberOfOpenCreditLinesAndLoans,NumberRealEstateLoansOrLines \n",
    "#分成5段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49     3837\n",
       "48     3806\n",
       "50     3753\n",
       "63     3719\n",
       "47     3719\n",
       "       ... \n",
       "101       3\n",
       "109       2\n",
       "107       1\n",
       "105       1\n",
       "0         1\n",
       "Name: age, Length: 86, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于age字段，分成6段 [-math.inf, 25, 40, 50, 60, 70,math.inf] \n",
    "#在做之前 先看看年龄的分布\n",
    "df_train['age'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "21     183\n",
       "22     434\n",
       "23     641\n",
       "24     816\n",
       "      ... \n",
       "102      3\n",
       "103      3\n",
       "105      1\n",
       "107      1\n",
       "109      2\n",
       "Name: age, Length: 86, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按照年龄从小到大来排序\n",
    "df_train['age'].value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
      "  FutureWarning\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABJMAAAHtCAYAAACtYz+SAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7/UlEQVR4nO3de7xtZVkv8N8jWwvBuzsUEylvmSma20uISiYKKiiX0tK8H7ygmR4LTdMsL2l2Me+koalpxMULoqQpB4o0IdH0RCdTMBWTOgrq8Yrv+WOMDZO551zj3Wzm3ovN9/v5rM+e813vM8czx3rWmHM/6x1jVmstAAAAANDjGjs6AQAAAACuOjSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAGA7qKqau79hR+UCALAtNJMAALZCVf1Ix5xdqmqXmfvXSHJ+VV13ZtrLquqoVeS4IJ+fGv/dvap+uTPmdVX1oIk5r6yqx1TVNapqr6q6SVU9oqpOHG//eFVdZ5xbs/sEALjq0kwCAK5UVXVeVe093j64qj5fVRdW1cO2Yw4Pq6pXbsX8U6vqs1V1zvjv/x1vn1NV36yq289MP3vme+dU1dfG53jpWJJPJjlkJuZWST7XWrt4ZuxeSU7ZlufZ+dxunOTMqvrxJN9O8ttVdceOmF9K8o8TD39xku8muXaS3x6/fjHJ7ZI8b7x/p3Hu85K8Ynz8R4w1cdbM15er6veuyHMEALYvy6sBgFX63SSPSvJ3Sa4/Nbmqfqe19jvbutHW2l8l+autCPl+kie01k6rqvsleWRr7TFjTqeN39/82D8zk29laBzdt7X2X4seuKqen2EfXG9sNH0gyZ8luW2SD45nv+2R5IDW2ke3Iudez0nyltbaF8d8npvkL6pq39ba/5vJ8wtJvpWhObRnkh/M5LfZLkne2lp7eVW9IMmPJblpkr2T7J5kr/HfGyS5Q5LrJnnWGPv6DI240zPsz3e31p4ws/3fSXLJlfrMAYCV0EwCAFbpBkn+o7XWknytY/4LkvzOSjNa7HtJ3lBV30hynSQ3qqqzxu/dNskPN0+sqlsleed495pJbpnkA3NNl3e01v5wvH2TJM9qrb1rbFT9aobm0m+01o4ZH/OvMzRxrlRV9XNJfjnJpQ2w1tpJVfWQJO+vqge31r4xju81xvxkkg8nuXOSRyc5s7W2aIXSYRlWVl2U5D5Jfj/DftyU5MAkv5ekktwmydmttQur6vHjnI1LUm7b9owBgO3BaW4AwDYZr5fzuqq6oKrenqHB8uCq+kqSmyf5eFV9paquvcZjvGycn3HuZ2a+9+aqenJV/XlV/dvM+EPHU9K+XFWvmnu8x1TVm2fu7z2efvfM8fE/VVU3mQm5RpInttY2JTkqycmttU3j/bNz+fdMuyT55vi9f0myz3j7m0n2T/InGRpIm/0wl3e9JI/JZQ2pjPvsu2s9r6rav6o+V1XnVtVbquoD4/jdxtPEvlRVx4yrpVJVN0vyjiRPbq3937kcnpDkC0k+XVUHzWzjWknemuSZrbWvZVhddKO52M3zfjjmfF6SZ2Y4Le7hGZpr/zbe/uUMjaUkSWvtg621/zXuw0W8NwWAqwAv2ADAtjoiyc8m+YkkJ2Y4Rerk1tpNkvxHkru21m4ye0rVvNba0eP8jHNvPzflOUn+PsndZ8aemOH6PHsl2b+qfnoizz0zNHn2zNBImb0Q9Y8keeN4Gtrrkxwyc/2jTRmaPZvNn4r1zqr6uwwreZbNmfX1DI2Xl8w0tK6Vy1YmLXteL0vy5CRPT3LL1tqBY1Pn7RlWEO2V4Wfw0HH+MUn+IMkrxusTfbeqvlhVX0xyYYYVQn+aYSVWquqa42PdI8lLx+f+0CSvHvfFF6rqFeNj75GZZlhr7V8y/KzvMu6vzV/faa29ePbJV9XPJvlGkrtX1TfGRth3kzx4zAsAWOc0kwCAbbVvkuNba99prZ2QoVlyZTultfamuRU2j8twEet3JLl1huv3rKUl+Z3W2g8zrDa63qXfaO2g1tqtWmt3SvKkJO9prd1p/Nq9tfaZxQ+ZJHl4a22/JJ9YY84fzzSq0lr7WIaVTE8fv/+jSb4z8by+k6HpdK1ctrLnthmuV/S3Sb6UoZmzufl0aGvtNa21W2doNp3SWvvx1tqPZ2hMnd9a+8PW2nFVdaMMp6z9MMnfJHnYuC9OSnLUePv5GRterbX/aK29bO457p3k+CTPHr/ekGGV0qWq6j5J3pLk/UkeP+Z9jyQXjCvBXrfGPgQA1gnXTAIAtlXl8te6mT+t68pwuQtTV9X1k5yVYeXNi7P8GjyzvjKzOupy1+apqs2NoEuy5TWTrp2hCXXcksd9Z1V9J8MpYcs8Y+aaSY8cx/4kw6esPXfcxsUTz+szSV6a4f3b4zennuSzrbXbjc/jRzM2mlpr35uJvU+Sj83cv1ku3/z6f0nelaHZ9e41nsda1zT6QYbm0Q/G+zednV9V10jyRxlOoWvjiqvz13g8AGCd0kwCALbVPyY5ary+z4FJbrgNj/XfVXWLJF9Ocu3W2kVL5t0qQ+PntUn2ybAiZ8rSRkhr7dJT1OY/za3Dw1trnx0/9a1ba+0rVXWH1toPq+o6GVYq3SULnldV7ZLkkCS3mTtd8Nwk166qe2U4DfCtGT4575Uzz+dmGa7RdMeZuD2TvHcml28nec04f4uV6+P298hljaJl9s9lp/tdO8nnZr73xAwrkD44XtfpKUl+fW47N0pSyz4ZDwBYH5zmBgBsq3dmuBD1FzKcIvaVbXis38zQFLkgl29+zPtkknMynNr1u0n+OcOnhm218bpDU3M2VNXmP8JVkruMK5fun+Sk8fZdkpw+5jP7HuuaufxpbrPXX/p+Vf1MkkvGT7xb+Lxaa5eM418Yr1304bER9b0kD0vyqjHmO0kuPVWsqm6X4ZPZnt9a+8+q2rWqbj7mOtvomfUjM7c3ZFjpdGySZ2Q4BW7WtWaea2VYgbXfeNrfw2fy+JEMP9v/Oe7vP0vyxdbamRmafJt/Bj+f5G1L8gIA1gkrkwCAbTI2Oh6/5Ht7b+Vj/XmSP58be8yCed/PsFJn2eO8OcmbZ+6fl+GaPpvv/87M9H+tqpbhgtSXqqpzZ+7ukuHUs2MyNIPObq3tv2jbVfXIDBef3uzEJH8wrl66SYbrIG1eAfSGDM23w9d6XlW1+SLgGzM0bZ6V4dpKz2itfTTJnRbEHJFhtdEzW2tvH4f3SXJchmtcfXZR/kkOSvL98fY1MzR6Hj/mNu+auayZNN+Ue0mGVWtprX23qm6f5LpJ/j1Dw/AR4/cuqapPj8223cbnBgCsYzX8EQwAYPWq6uNJbr7gW7dorX13wThJqup6GVbsbMpwGtx5SZ7QWjt3jZhKcr3W2te3R469quq2rbV/3dF5AABXnGYSAAAAAN1cMwkAAACAbppJAAAAAHS7Sl+A+8Y3vnHbe++9d3QaAAAAADuNs88++79aaxuXfX+lzaSq2iPJB1prd66qNyW5XZJTWmsvGr/fNbbM3nvvnbPOOmuVTwEAAADgaqWqzl/r+6s+ze0VSXatqsOS7NJa2zfJnlV1696xFecHAAAAwFZYWTOpqu6b5FtJvpJk/yTHjd/6cJL9tmIMAAAAgHViJc2kqrpWkucnefY4tFuSL423L06yx1aMzT/2kVV1VlWddeGFF64ifQAAAACWWNXKpGcneU1r7evj/W8m2XW8vfu43d6xy2mtHdNa29Ra27Rx49JrQQEAAACwAqtqJt0vyVFVdVqSOyU5OJedsrZPkvOSnN05BgAAAMA6sZJPc2ut3Xvz7bGhdEiSM6pqzyQHJblHktY5BgAAAMA6sepPc0trbf/W2sUZLq790SQ/31q7qHds1fkBAAAA0G8lK5MWaa19LZd9UttWjQEAAACwPqx8ZRIAAAAAOw/NJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQbcOOTgAAlnnEuw7smvf2h35gxZkAAACbWZkEAAAAQDfNJAAAAAC6aSYBAAAA0M01kwDYLo46se/6R685zPWPAABgPbMyCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3VyAG4CdykHveWjXvPcf8q6V5gEAADsrK5MAAAAA6KaZBAAAAEA3zSQAAAAAurlmEsDV3AuOO7B77gt/6QNJkt88vi/m5Ud84ArlBDuzBx//9q55Jx/xiBVnAgBwxViZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN027OgEAODq4oHv+q2ueac89CUrzgQAAK44K5MAAAAA6GZlEgBXewe9+wld897/kDeuOBMAAFj/rEwCAAAAoJuVSQBwBRz0rmd0zXv/Q/94xZkAAMD2ZWUSAAAAAN2sTAIA8qATX9E1732HPWvFmQAAsN5ZmQQAAABANyuTAACuoAcf/xdd804+4lErzgQAYPvRTAKAdeyBJ72wa94ph75gxZkAAMDAaW4AAAAAdNNMAgAAAKDbyppJVXXDqjqgqm68qm0AAAAAsH2t5JpJVXXTJCcmOTnJH1XVfZOcneRz45Sntdb+uapemOSBST7WWnvqGLvFGADAqj34hGO75p18+GNXnAkAwPq2qpVJt0/yjNbai5OcmuRxSd7RWtt//PrnqtqUZL8kd0vyxaq636KxFeUHAAAAwBWwkpVJrbUPJUlV3TtDY+j4JIdW1T2TnJ/k0UnuneSE1lqrqg8lOTjJRQvGPrSKHAF2Ri955wO65/7Ww09dYSYAAMDOapXXTKokD0vy/SSfTHKf1tp+Sb6e4TS23ZJ8aZx+cZI9lozNP+6RVXVWVZ114YUXrip9AAAAABZYWTOpDY5KcmaSm7TWLhi/dW6SWyf5ZpJdx7Hdx1wWjc0/7jGttU2ttU0bN25cVfoAAAAALLCSZlJVHV1VjxrvXj/J66tqn6raJcmhGVYqnZ3h+khJsk+S85aMAQAAALBOrOSaSUmOSXJcVT0hyaczXB/p7UkqyXtaax+qqmskeWlVvTLJgePX+QvGAAAAAFgnVnUB7q8lOWBu+I5zc344flrbg5K8srX2+SRZNAYAAADA+rCqlUldWmvfzvBJb2uOAQAAALA+7NBmEgBw9fKgE1/TNe99hx214kwAALiiVvZpbgAAAADsfDSTAAAAAOjmNDcA2Mk88KSXds075dDnrDgTAAB2RlYmAQAAANBNMwkAAACAbppJAAAAAHRzzSQA4Ap50Imv7Jr3vsOevuJMAADYnqxMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB027CjEwBgsVe84wHdc5/1y6euMBMAAIDLaCYBAOvag054fde89x3+pBVnAgBA4jQ3AAAAALaCZhIAAAAA3ZzmBgDsdB50whu75r3v8CesOBMAgJ2PlUkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOjmAtwA28Gfvv0B3XN/7RGnrjATAACAbWNlEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACg24YdnQDAjvRnf/GA7rn/41GnrjATAACAqwYrkwAAAADoppkEAAAAQDenuQE7jWPfcv/uuY999N+sMBMAAICdl5VJAAAAAHSzMglgK732bf0X7X7KI120GwAA2LlYmQQAAABANyuTAADWuQcf/86ueScf8fAVZwIAYGUSAAAAAFtBMwkAAACAbitrJlXVDavqgKq68aq2AQAAAMD2tZJmUlXdNMn7ktwtyUeqamNVvamqzqyq583M6xoDAAAAYH1Y1cqk2yd5RmvtxUlOTXLfJLu01vZNsmdV3bqqDusZW1F+AAAAAFwBK/k0t9bah5Kkqu6dYXXSDZMcN377w0n2S3LnzrF/W0WOAAAAAGy9VV4zqZI8LMn3k1SSL43fujjJHkl26xybf9wjq+qsqjrrwgsvXFX6AAAAACywkpVJSdJaa0mOqqrfS3JEkl3Hb+2eoYn1zc6x+cc9JskxSbJp06a2qvyBHettb35A99xHPubUFWYCAADArFVdgPvoqnrUePf6SX4/wylrSbJPkvOSnN05BgAAAMA6saqVScckOa6qnpDk00neleT0qtozyUFJ7pGkJTmjYwwAAACAdWJVF+D+WpIDZseqav9x7OWttYu2ZgwAAACA9WFl10yaNzaYjrsiYwAAAACsDyv7NDcAAAAAdj6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbht2dALAzu+vjj2we+7DHvuBFWYCAADAtrIyCQAAAIBumkkAAAAAdNNMAgAAAKCbayYBAOyEHnz8X3fNO/mIX1xxJgDAzsbKJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOi2YRUPWlXXS/LO8fG/meRhST6b5HPjlKe11v65ql6Y5IFJPtZae+oYu8UYsH6ccOyB3XMPf+wHVpgJAAAAO8KqViY9IskftdYOSPKVJM9O8o7W2v7j1z9X1aYk+yW5W5IvVtX9Fo2tKD8AAAAAroCVrExqrb125u7GJP+R5NCqumeS85M8Osm9k5zQWmtV9aEkBye5aMHYh1aRIwAA2+6Q49/TNe89Rxyy4kwAgO1lJc2kzarq55LcIMkHkxzbWrugql6T4TS23ZL8+zj14iR7JPnBgrH5xzwyyZFJstdee60yfQCAq5WDjz+xa957jzhsxZkAAOvZyi7AXVU3TPKqJI9L8qnW2gXjt85NcusM11LadRzbfcxl0djltNaOaa1taq1t2rhx46rSBwAAAGCBlTSTqupaSY5L8pzW2vlJ3lpV+1TVLkkOTfLJJGdnuD5SkuyT5LwlYwAAAACsE6s6ze3xSe6S5LlV9dwkH0ny1iSV5D2ttQ9V1TWSvLSqXpnkwPHr/AVjAAAAAKwTq7oA9+uSvG5u+IVzc344flrbg5K8srX2+SRZNAYAAADA+rDSC3BPaa19O8nxU2MAAAAArA8ruwA3AAAAADsfzSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN027OgEgB3nPX9+UPfcQx73/hVmAgAAwFWFlUkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEC3DTs6AeDKccqbHtg994GPP2WFmQAAALAz00yCFTvtzx7UPXf///G+FWYCAOvDIcf3vd6954j+11AAYPtxmhsAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoNuGHZ0AsKUPvvGB3XMPeMIpK8wEAAAALs/KJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOjW3UyqqgcsGLvLlZsOAAAAAOvZhrW+WVV7JrkkyfeTPL2qzkuyS5KLk9wlyVOTHLDiHAEAAABYJ9ZsJiU5K0lL8pcZGkgvT/KzSY5Ncp8kF600OwAAAADWlalm0r9maCadm+RmSU5Pct0k/5Xkh6tNDdafvz/mwd1z73nkySvMBAAAAHaMqWbSZi1JJbl1kh/LcIrbHkmuvaK8YOX+8Q0Hd8+92xPfu8JMAAAA4Krjin6aW5v5AgAAAOBqYmubSf8nyX8m+ackXx2/tlBV16uq91fVB6vqpKq6VlW9qarOrKrnzczrGgMAAABgfZhqJv1UktsluX2G09zuk+Q2SW6c4VPdlnlEkj9qrR2Q5CtJHp5kl9bavkn2rKpbV9VhPWPb8uQAAAAAuHJNXTPpLkm+l+SSJO9I8usZGlDfSPK5JI9cFNRae+3M3Y3jvD8Z7384yX5J7pzkuI6xf5t97Ko6MsmRSbLXXntNpA8AAADAlWnNZlJr7ctJUlWbkry9tXbuzLffXFWfXCu+qn4uyQ2SnJfkS+PwxUlulWS3zrH5nI5JckySbNq0yTWbAAAAALajyWsmVVUleXeSO1fVU6vqruP4XZK8do24GyZ5VZLHJflmkl3Hb+0+brd3DAAAAIB1Ys1mTVVVa60l+WySN2S4APdBVXV2klcn+cUlcdfKcLrac1pr5yc5O8Mpa0myT4aVSr1jAAAAAKwTU9dMen9VfTPJ9TKccnaXJJuSvDfDRbk3JvnigrjHj3OfW1XPTXJskl+tqj2THJTkHklakjM6xgAAAABYJ6aaSUck2TvJUUl+N8kFSR7SWrukqvZO8udV9Qvj6qVLtdZel+R1s2NV9Z4kByR5eWvtonFs/54xAACu3h5y/Kld8959xANWnAkAMNVMekqGFUn/neRTGT6R7fNV9fYkP5Pk6PlG0jKtta/lsk9q26oxAAAAANaHqWbSdcd/983wKWsbknwyyTlJHpzkMyvLDLbCJ15/cPfcOz/pvSvMBAAAAHZuU5+W9sEkn09y5yTnJjk0w4WxD0zy8iS/tdLsAAAAAFhXpppJ903yvQyf3HbbJG/N8MluH22tvTXJLatq6jEAAAAA2EmseZpba+0FVbVbhk9zu2Sc/7zW2pnjlCe31n644hwBAAAAWCemrpmU1tq3knxrZuhLM9/7+gpyAgAAAGCdcooaAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQbcOOTgAAAFbhocf/bde8dx3xCyvOBAB2LlYmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN00kwAAAADoppkEAAAAQLcNOzoBWOQzrz2ka97tn/KeFWcCAAAAzLIyCQAAAIBumkkAAAAAdNNMAgAAAKCbZhIAAAAA3VyAGwAAtsFhJ/x917wTD7/nijMBgO1DMwkAAEaHnvC/uuaddPh9VpwJAKxfTnMDAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTbsKMTYOf32Vc/pGverZ767hVnAgAAAGwrK5MAAAAA6KaZBAAAAEA3zSQAAAAAuq3smklVtUeS41tr96qqmyX5WJLPjt/+xdbahVX1piS3S3JKa+1FY9wWYwAAsLM4/IR/7J57wuF3W2EmAHDFrGRlUlXdIMlbkuw2Dt09yYtba/uPXxdW1WFJdmmt7Ztkz6q69aKxVeQHAAAAwBWzqtPcLknysCQXj/fvkeQpVfUPVfXH49j+SY4bb384yX5Lxi6nqo6sqrOq6qwLL7xwNdkDAAAAsNBKmkmttYtbaxfNDL0/yb6ttZ9LcpuqumOGVUtfGr9/cZI9lozNP/YxrbVNrbVNGzduXEX6AAAAACyxsmsmzTmztfbd8fa5SW6d5JtJdh3Hds/Q2Fo0BgAAAMA6sb2aNadW1U2r6tpJHpDk00nOzmWnse2T5LwlYwAAAACsE9trZdILk3wkyfeSvL619q9VdUGSM6pqzyQHZbiuUlswBgAAAMA6sdJmUmtt//HfjyT5qbnvXVxV+yc5IMnLN19jadEYAAAAAOvD9lqZtFBr7Wu57NPblo4BAMDV2REnnNM99/jD77SyPAAgcYFrAAAAALaCZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6LZhRyfAVcsXXvXwrnl7Pe2dK84EAAAA2BGsTAIAAACgm2YSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBumkkAAAAAdNNMAgAAAKDbhh2dAAAAcOX7pRP+d/fc4w7/6RVmAsDOxsokAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtPc7sa+9JrntY172ZHvWrFmQAAAABXFZpJAABAkuRhJ/5799y/OuyWK8wEgPXMaW4AAAAAdNNMAgAAAKCbZhIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG4bdnQCAADAVdcTT/xC99w3HLbXCjMBYHuxMgkAAACAbppJAAAAAHTTTAIAAACgm2YSAAAAAN1cgBsAANiunnPSl7rnvvTQm60wEwCuCCuTAAAAAOimmQQAAABAt5U1k6pqj6o6Y7x9zao6uarOrKrHbc0YAAAAAOvHSppJVXWDJG9Jsts49LQkZ7XW9k3y4Kq6zlaMAQAAALBOrGpl0iVJHpbk4vH+/kmOG2+fmWTTVoxdTlUdWVVnVdVZF1544QpSBwAAAGCZlTSTWmsXt9YumhnaLcnmj2y4OMkeWzE2/9jHtNY2tdY2bdy4cRXpAwAAALDEhu20nW8m2TXJRUl2H+/3jgEAAFdzLz3pgu65zzn0pivMBIDt9WluZyfZb7y9T5LztmIMAAAAgHVie61MekuSU6rqXkl+OsnHMpzO1jMGAAAAwDqx0mZSa23/8d/zq+qADKuOnt9auyRJ7xgdLnjt87rm3fQpL1pxJgAAAMDObHutTEpr7cu57JPatmoMAAAAgPVhe10zCQAAAICdgGYSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0E0zCQAAAIBuG3Z0AgAAAKvwqpP+s3vu0w7dY4WZAOxcrEwCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAuvk0t3XoP1/30q55ezz5OSvOBAAAAODyrEwCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3F+AGAAAYvenEr3bPffxhP7bCTADWLyuTAAAAAOimmQQAAABAN80kAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbht2dAI7u6++/pVd837sSU9fcSYAAMAq/OUJF3bP/ZXDN64wE4Dtw8okAAAAALppJgEAAADQTTMJAAAAgG6aSQAAAAB000wCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTbLs2kqtpQVV+oqtPGrztU1Qur6uNV9eqZeVuMAQAAALB+bNhO27ljkne01o5OkqralGS/JHdLcnRV3S/J1+fHWmsf2k75AQAAbDcnHv9f3XMPO+LGK8wEYOttr2bSPZIcWlX3THJ+kk8mOaG11qrqQ0kOTnLRgrEtmklVdWSSI5Nkr7322k7pAwAAAJBsv2smfTzJfVpr+2VYgbRrki+N37s4yR5JdlswtoXW2jGttU2ttU0bN25cadIAAAAAXN72Wpn0qdbad8fb5ya5VoaGUpLsnqGp9c0FYwAAAFxBf/OO/tPp7v/LTqcD+myvZtJbq+rFST6d5NAkp2W4PtI7k+yT5LwkZyf5pbkxAAAAkpx8XH9j6MG/pDEErM72aib9bpK/TFJJ3pPkRUnOqKpXJjlw/Do/yUvnxgAAAABYR7ZLM6m19ukMn+h2qfET3B6U5JWttc8vGwMAAABg/dheK5O20Fr7dpLjp8YAAAAAWD9c5BoAAACAbjtsZdJV0YWvf0PXvI1PeuKKMwEAAADYMaxMAgAAAKCblUkAAABc6rS3X9g9d/9HbFxhJsB6ZWUSAAAAAN00kwAAAADoppkEAAAAQDfNJAAAAAC6aSYBAAAA0M2nuQEAALBN/v4v+j8B7p6P8glwcFVnZRIAAAAA3TSTAAAAAOimmQQAAABAN80kAAAAALq5ADcAAADb3ceP/Wr33Ls+9sdWmAmwtaxMAgAAAKCbZhIAAAAA3TSTAAAAAOjmmkkAAABcJZzzZ/3XWbrT/3CdJVgVK5MAAAAA6KaZBAAAAEA3zSQAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHTTTAIAAACg24YdnQAAAACsyv9+/X92zfvpJ+2x4kxg52FlEgAAAADdrrYrky58/bFd8zY+6bErzgQAAADgqsPKJAAAAAC6aSYBAAAA0O1qe5obAAAALPLZV/VdtPtWT3PRbq6erEwCAAAAoJtmEgAAAADdNJMAAAAA6KaZBAAAAEA3zSQAAAAAuvk0NwAAANhGX/ijr3TN2+uZN1lxJrB6mkkAAACwA1zwsi93zbvp0XuuOBPYOk5zAwAAAKCbZhIAAAAA3TSTAAAAAOjmmkkAAABwFfGVPziva95NfmPvlebB1ZtmEgAAAOzEvvKH/6dr3k3+521WnAk7C6e5AQAAANBtXa5Mqqo3JbldklNaay/a0fkAAADA1clX/ujTXfNu8syfWXEmrEfrrplUVYcl2aW1tm9Vvbaqbt1a+7cdnRcAAACw3H/+8Se65u3xjDtv23ZeeWbfdp6+7zZth+Wqtbajc7icqvrTJB9orZ1SVUckuU5r7diZ7x+Z5Mjx7m2T/OuSh7pxkv/ays2LESNmfces17zEiBEjRowYMWJ2xDbEiBEjZlUxt2itbVwa1VpbV19J3pRkn/H2/ZM8+wo+zllixIjZuWLWa15ixIgRI0aMGDFXlbzEiBEjZltjWmvr8gLc30yy63h797hIOAAAAMC6sR4bNWcn2W+8vU+S83ZcKgAAAADMWncX4E7yriRnVNWeSQ5Kco8r+DjHiBEjZqeLWa95iREjRowYMWLE7IhtiBEjRsyOiFl/F+BOkqq6QZIDkpzeWvvKjs4HAAAAgMG6bCYBAAAAsD6tx2sm7fSq6oZVdUBV3XhH58IV+3lcGTE9j7Gjcruqx1yV7OzPj2lVtVtV/UJV/fiOzoUdx7GARB0wUAeoAa4KrrbNpKq6XlW9v6o+WFUnVdW1qmqPqjpjK2K2eIyOmFskeV+SuyX5SFVt7MltHN+jqj6xFc/nC1V12vh1h63Yzmur6uDO7Tx9ZhvnVNUbOmL2qKpTquqMqnp953ZuW1XvG2P+cFHMGLdVB96qumnmfh4ddTAfc4uOOpiPufn8dntyG8fXqoNFz2eqDpZtZ606mI/57Y46WJTbVB3Mx9y1pw6uqNl9O1UH8zE9x4MFMQv3/Vp5Lbo/sY0NUzWwxnaW1sCC7Tx5qgYWxNxgqgYWxPzEKmpg0X6qqmuvtZ+XxLyzqk6tqg/XcMr2VMw+ST6Q5OeSvLeqbt+znXF816r6963I7RMz9w/Yiu38ZlU9rXM7s8eCj1bVqT37oKqOG2+fWFXX7Ii5T1X99Xj7LxbFbIvZ2t+KY8Frq+rgrTwWbI7pOhbM5zaT39TxYPN2tuZ4ML+dnuPB5u1szfFgc8zWHA82x6zqeLBF/lN1sCDm2Kk6WLKfpt4bLNy3a9XBgpg3TdXBGttZ673BfMx3p+pgyb6eem8wH3Pqiupgi5rsqIPLxfQcDxbETB4Plv2+TNTB/HYmjwdrbGetOpjfzuTxYEHMwu1OxPzElV0Hix6zowYuF9NZA/MxPTWw8PlO1MD8dnpqYNl21qqB+e301MB8TE8NzMdc6TWwKlV106q6X1VdZ0fnsk1aazvdV5I3JTkzyfPWmPOUJAeMt1+X5NEZ3tD/01bE/Nrc/UM6Yg5Jco/x/iuSPKAnZrz91iTndub2/CQvm9hPi3K7V5ITtyZm5nuvSnKXzv32K+P9tyfZ1BHzf2f2218l2X9BzE2T/EOS5yb55yQbp2ohyf3mfh4P66iD+Zind9TBfMwDOupgi5iOOpiPObqjDhblNlUHC3ObqINF+22qDuZjvjZVBzOxeyT5xHh78pgwu2+T3GCqDhbELP3dWCNm6X5cNH/Z/Ylt/OxUDSx63KkaWCuXZTWwILfJY8GCmOOmaiDDh0t8Iclp49cdkrwwyceTvHrJ419uPyXZJcnJSc5bI6f5mEOSPHa8fXSSX+uI2SfJg8fbT03yxKmYmfEXJ/l2Z243SvLOiZ/lFttJcqskf5dkl96Yme/9RpLDOvfbb808pyM6Yl6e5OHj7Zclecwaz+u1SQ4eb/e8P7i09tN5LJiL6ToWzMX0Hgu2+L3MxPFgbjtdx4P57Szabk9u4/jS48Fcbl3Hg7mYnuPBk3PZseCcJG/oqYP5/HvqYC7muT11MBdzdE8dLNq3U3Ww4Pl0vS7MxXS9Liz6ua9VB3NzXtBTB3MxH+yogxskOSXJGUleP45NvUecr8m7TtXBgpjnT9XBgphnTdXBst+XtepgQczdpupg0Xam6mBZbmvVwZL9NvUecT7mw2vVQZKfyNCgOSPJH3bWwPwx5tCOGpiPmXxNWBAz+ZqwIGb/jhqYj3lmRw1ssZ2OGlh6bF6jBuZjev6/OB/z1bVqYBzfI8kZ4+1rZnivd2aSx83MuV2Sd/fMT7JXhteXD2e4iHV1xNwxyekZjnf/lORaHTE3S/LFXPZ6trEj5oUz889N8pyOmJ9N8qFx7H+uVRuttZ1vZVJVHZbhTe++Sfasqlsvmtdae21r7YPj3Y1JPp+hkXDxssdeEPOPc/e/2hHz1dbaR6vq3hkO4v/QE1NV903yrSQLL0i+IOYHSQ6tqr+rqrdX1Raf3Lcg5mtJ/izJeVX1kM7tfDVJqupmSfZorZ3dEXOdJLetqusnuXmG//RNxVw7wy9bxm1eb0F6t0/yjNbai5OcmuS+maiF1tqH5n4e7890HczHHNtRB/Mx/9BRB1vEdNTBfMy3M10H8zEfzXQdbJFbMlkH8zFfz3QdzMd8LdN1sNkrkuzae0yY27eXZKIO5mOW/W5MxCzcj2vktcX9judyj0zUwHxMDas81qyBZbmsVQMLYv47EzWwIOY2ma6BOyZ5R2tt/9ba/kl+JMl+GfbxF6vqfgtiLrefMjSkjkxy3pKnvyjmlNbaseP3ltXAfMxnWmsnV9WdM7xJ/ZupmPEviT+VoUn2sc7c7plk3/Gvde+rquv2bCfDf7z/T5JfrqpdOmNSVbsmuX9r7cSO3C5Kcuuq2j3DcfzfOmJ+Oh3Hgqq6V5KbtNbe23MsWFD7k8eC+ZieY8GCmJ5jwRa/l1PHgwUxk8eD+Zie48GyOWsdDxbETB4PFsRMHg9aa6+bORackaGeJ18TZvPPUJOTrwlzz/nFPa8JczEvm6qDBTFn97wuzD2fu6fjdWEu5lPpeF2Yz23R/YntfDYdrwtzMRszfTz41SRva63dK8l1quo3M10H8zX5n5mug/mY13fUwXzMX3TUwRa/Lx11MB9zYKbrYD7mPzJdBwt/lyfqYD7m/EWPMRFzk6xdBy9L8ntjDfx4z2tCtjzG/DDTNTAf86WOGpiP2a2jBrY4/nXUwHzMJZmugUXH2akaWHhsnqiB+ZiLMl0D8zHfWrTdzWpYMf6WJLuNQ09LctZYAw+uqutU1S2T/EGG/Tk5P8kTkzy5tXbfMc99O2J+OsMfH1+Y5HMZ/rA4FXP3JC+eeT37wVRMa+0FM/P/Ocm7O7bzqiSPzfC+8fCq+okF+/1SO10zKUO39Ljx9ocz/Adiqar6uSQ3aK2d3lq7qGcDMzEfXXR/KqaqKsOB6PsZfonXjMnwS/H8JM/uzS3DX2nu01rbL8N/2h/YEXObJP87w19871ZLTmuYfz7j0FEZuu09ub0tya0zdJzPzdAgmIp5UZIX1LCc8sAkfzs/d8Gb8Qekoxbmfx49dbDoZzhVB/MxPXUwN6fSUQdzMZ9MRx3MxTwyHXWwJP8162Au5rR01MFczFsyUQdjzOyL6f6ZqIMalhxfum9baxdP1cF8zMz40jpYFLNWHczPX7bNiW18PBM1sCDmUZmogTVyWVoDC2L+LhM1sCDm+EzXwHzj4b5JTmittQx/bbnXgpj5/fSA1tqXFz2PNWIeOOb8k5u32RuT5OAML+7f7ozZvLqvN7e9k/zC+Cb6tCSP6Yh5UIZG/vOT7J6hHnqfz68m+cvO3G6R4a9yv5bh9/ZzHTF/neTZNZyu9/gMb5AuZ0HjYf9MvyZcrvaTPLrjNWHh78vEa8IWMR2vCfMxT8/0a8L887lRpl8T5mOesuj59eyDrP2aML+dW2X6NWE+5oR0vCYkl2s83CL97xOPSvK6nteE+ZiZ7U6+R5yN6X2PuDmm53VhwXYmXxcWxEy+Lix6PkvurxUz+bqwIKbndWG+8bB3putgPpcLOupgYf4TdbBFTEcdzMdclOk6mI95f6brYD7moZmug2U/w7XqYD7mtCWPsVbMO7J2Hcw3Hv4w0zWwRW111MDCepyogS1iOmpgPuYjma6B+Zh/ynQNzMfsnekaWPY7uVYNzMeclukamI9505Ltbjb/B6L9c1kNnJlh5d03khzeO7+19tzW2r+MYzdKcmFHzDuTnF9VD8rw/9z5P1gsyuseSZ5SVf9QVX/c+VySJFV11yRfyrCyaSrmhq21/xjfM/93kkV/fLxMm1i6dFX7ylBE+4y375/k2WvMvWGSs5LcYmbstInHv1zMosfo2c44/ntJHjYVk+HA8ItT+c3F/MjM+NOyZJnaXMyrkxzYLlvet3D54oJ9cI0MK2269luGpYrXHcefmeTIzu3sl+E/DGudnlBJXpPh4HFsby3M/zym6mA+pqcOlv3c16qDuTkv6KmDNbaztA7mYv6lpw4W7IPJOpiL+XZPHSzYzpp1kOE/pacluf747+QxYdnv2MTv2xYxU3WwbDvL6mB+/lrxa8RMHgsWxEweC5Y8/zVrYMF2Jo8FS7YzVQN3TXLT8fZrMvzuPGS8f5uMpzjMxSzcTxM1sEVMhlVQp2f5KTpLfx4ZmiJHd8S8IMlvTNTBfMzRyaWf4Hpwkld1xjxpvL9rFizrX2O/fSTDX1Z7cvt2kjuO9w9L8pLOff2zSd6Y5I1LtvP4JCdm+Gv1izO88dpn/N6yY8HC2p+ogy1iMn0sWPo7liWvCQtifpjp48F8zEnL6m+NmA8vy3ViH0wdD7bILdPHg0XbmXxvMM5/SZKfT+f7xEX5r1UHi2Km6mDZdtaqg/mY9L9HnI3pfY84G9P7HnF+H/S8R5zdTu97xPntTL0u3CJDg/v5GU5F6XlvsDCXif28RcxUHaz1nJfVwYKYnvcH8zFPnPnesuPBfMz/m6qDJftg6ngwH3PBVB0s2c7SOkjyvAyvBQdn+M9777Fgi8dcqwYWxUzVwFo1vKwG5mN6amBBTO+xYDam91gwvw96jgWz2+k9Fsxvp+f/i6eN//5tkuuNt4/MePr8/D7snP+wDGerdMVk+H/KWzL8cewaUzEZXr+uM469L5e9b+rJ7W1JbtmTW4b3VU9N8isZVjPVWj+znXFl0jczvOlNhr+kLnyO419yjkvynNba+T0PPB/T8xgLYo6uqkeN375+hi7wVG73S3JUVZ2W5E5V9caOmLfWcFHTXTKcOvHJjpjPJvnJ8dubMiwxnYpJhr/yLzvVYlHMtZPcYczt7kla53bOyXBe6h8t21YbHJWhu3qPTNRCz8+jM2aqDuZj9u6og/mYAzNdB/Mxr++og/mYN2e6Dhbtg6k6mI/5TqbrYNF2zsnadfDsJK9prX19vN9zTJj8HeuI+fNMH1PmY9pEHVxufobVJFN5zse8f6oGFsQ8KRM1sOC5vDETNbBgO3tmogaWbOecrF0Dn2qtXTDePjd9NTB5zOyMOTbDm4mzOmO+WFW/PX7v+ll8DJqPeWKSQ2b2yckdMQ/P8AY6SX5xyfObj/n3TNfBFvugqvZOclFr7Vud++DcDKfsJcPy8EV1sGhfn5PkZzK8gV7kzkmOaa19JcMbqdMzXQeTr4OdMVPHgvmYQztei+ZjPpnp48F8TDrqfD7m5xc8v6mY8zN9PNgit0wfDxZt55xMvDeoqmsk+fnW2kfS+T6xI/81Y7bifeZsTO97ktncel+/ZmN6j3ezMb2/G/P7rWc/zs6ZfI+45HHPydp18JIMzfHfzXDM+ZVM10FvLmvFVKbrYD7mBh11MB/z5EzXwXzM73fUwXzMCzJdB4v221QdzMf86ILH6NnOOVlSB621F2VYjfWEDP+J7z0WLH3MNVwasxXHgtmY3mPBbG69x4LZmN5jwWxM77FgNibpOxbMxvT+/s1vZ/7+WnprYM35NaxIf1aSX++Naa19vbX26Az/H7prR8yZrbVvjGPnZli11ZPb9ZP8WGvt3ztze+L4+E/NcD2ttY97a3WaropfGZbhPmu8/cKMF+5aMO/JGZbLnTZ+Ta5IWRDzgkWPMbWdDKegnZ7hoqBbdPuW5bZWfkty+1SGjuKLO2MenaE7enqGc3Nv1vl8XpIFF1ldI+boJJ/JUMAfTLJ753ZemORX19jO0UkeNd5+1fh81qyFXHZK4OV+HhN1MB+z9Oe1RszC7fbkNlEH8zF36KiD+ZjrdNTBFrl11MF8zN076mDRdqbq4PSZn8XXM5wyM3lMWLRv16qD+ZieOlgQM1kHy3LpyW3cxs9M1cCCmMkaWJTLVA0s2M7dpmpgyXamauC4DOef75JhVcXzM154O8N54L+1IGbhflprP8/HJDkowyqbzTXw9I6Ya2U4Vef0JO9M8qO9ua2V34Lt3DTDm7hPZzj165odMbuMc09P8okkd+jJLcNft565Ffvtlkn+McMpbh9bVG9LtvPYJL+9xnZ+PclTxtu/muHaiFOvCQtrf6IO5mOem+nXhC22k+nXhKW/l2vUwXzMfZbV0hoxt1i23YnnM/WasCi3qdeERdtZ83gwxt0nyZ+Mt3vfJ26R/1p1MB+TzteEuZiu14Rl+3aiTme30/W6MBfT9bown9tUHSzYTtfrwoLtTL0unJThGiC7ZLgw7wum6mBZLhP7eT7maVN1sCDm5lN1sNZ+WpbfgpjJOlgQ0/MecYvcpupgQcy9p+pgyXam6mD3DK9n107/sWCLx1yrBuZj0n8smI3pPRYsfL4TNTq7nd5jwWxM77HgcrlN1cCC7fQeC+a30/OacNr47/MzfuhHhgbjvov24Vrzx5/VxzL3Hmki5nVJ7j2OvTfJbTtiTsvwXu7aGd7LTcaMtx+d5Ne35vlnOAaduazuZr82/8d5p1HDRUXPyLBs66AMV3S/aMdmxfZQw0XSjstwmsmnM1yx/vSohaut8a8zh8Qx4Wqlqn4mw+kMleQ9SX47Qw2clWGF34Gttc/vuAzZHmq4kOSfZ7hOzjUzrM56TxwLrnaq6iUZLjJ6oveJV09VdbcMK0dvkeE/wIdHHVztVNULk3y2tfZWx4Krr6o6rbW2f1XdIsOnPH4oQ8PmHq21S2bnTM3P0CR7VJJ/HR/+Ba21/zURs1eGT91rSf6mtfZ7Hdu5d4Ym1PcyrLp+dc9zqaq/TPKK1trm64X1xLwlwyUEzpjclztbMym5tKlwQJLT27C8nasptUCiDrj008UelOG6P4su8MzVgGMBiTpgoA5QA1TVnhmus3RqTzNxa+fvjDGXi98Zm0kAAAAArMbOeAFuAAAAAFZEMwkAAACAbppJAABXkqr6kaq65VbG7Do9CwBg/dBMAgC48vxKklct+2ZV/XhVHVBVT5wZflNVHbj61AAArhwuwA0AcCWoqo1Jzkny70kuTnLTJBdk+OPdrq21n6+q4zJ8vO+rk7wryYuTvD3Jt5I8qrX2w+2fOQDA1tFMAgDYRlV1zSTvS/Lh1trvj2Mfba3dY2bOxgyNpL9McvMkb0vyxCSfSPLdJIcn+fXW2ve3c/oAAFtFMwkAYBtV1d5JHpfkukluNQ7fM8k/JNmQ5D1JPpPkDhlWLF2c5JIk+yQ5Lcl/Z1jB9KOttbdtx9QBALaaZhIAwJWkqj6c5P6ttR8sWJn0kxkaTrsmeXOSJyc5Kcl1kvxEkk+11j64/bMGANg6LsANALCNqmqXqtolycK/0lXVNZJUkh8m2ZTkPkl2S/KNDKuUdstwmhsAwLq3YUcnAACwEzgyyUOSfDvJu6oqSW5bVSeP398lwyqk+yd5a5IfzdB4ul6SGya5d5KHb+ecAQCuEKe5AQCsQFV9rLV295n7t0zysiR/n+QmGa6d9Ngkxyf5cmvtqB2SKADAVnKaGwDAauw+d/+/MnyS298k+d0k12ytXZLkEUnuWFU32s75AQBcIVYmAQDsYFW1y9hYAgBY9zSTAAAAAOjmNDcAAAAAumkmAQAAANBNMwkAAACAbppJAAAAAHT7/7ayEy+RSs91AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1440x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#可视化一下\n",
    "# plt.bar(df_train['age'].value_counts().sort_index().index,df_train['age'].value_counts().sort_index().values)\n",
    "plt.figure(figsize=(20,8))\n",
    "sns.barplot(df_train['age'].value_counts().sort_index().index, df_train['age'].value_counts().sort_index().values)\n",
    "plt.title(\"df_train里面的age分布情况\")\n",
    "plt.xlabel(\"年龄\")\n",
    "plt.ylabel(\"数量\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1         (40.0, 50.0]\n",
       "2         (25.0, 40.0]\n",
       "3         (25.0, 40.0]\n",
       "4         (25.0, 40.0]\n",
       "5         (40.0, 50.0]\n",
       "              ...     \n",
       "149996     (70.0, inf]\n",
       "149997    (40.0, 50.0]\n",
       "149998    (50.0, 60.0]\n",
       "149999    (25.0, 40.0]\n",
       "150000    (60.0, 70.0]\n",
       "Name: bin_age, Length: 150000, dtype: category\n",
       "Categories (6, interval[float64]): [(-inf, 25.0] < (25.0, 40.0] < (40.0, 50.0] < (50.0, 60.0] < (60.0, 70.0] < (70.0, inf]]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#看到这个年龄的分布之后  开始做分箱\n",
    "age_bins=[-math.inf, 25, 40, 50, 60, 70,math.inf]#这里是分成了6段 0-25 25-40 40-50 50-60 60-70 70-无穷大\n",
    "df_train['bin_age']=pd.cut(df_train['age'],bins=age_bins)#这个操作对年龄段进行分箱\n",
    "df_train['bin_age']#这样就按照分箱的方式分好了  比如第一个年龄是45  就被分到了40-50这一箱  注意观察 这里还是左开右闭的操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>bin_age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>45</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>38</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>30</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>49</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>74</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>44</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>58</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>30</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>64</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        age       bin_age\n",
       "1        45  (40.0, 50.0]\n",
       "2        40  (25.0, 40.0]\n",
       "3        38  (25.0, 40.0]\n",
       "4        30  (25.0, 40.0]\n",
       "5        49  (40.0, 50.0]\n",
       "...     ...           ...\n",
       "149996   74   (70.0, inf]\n",
       "149997   44  (40.0, 50.0]\n",
       "149998   58  (50.0, 60.0]\n",
       "149999   30  (25.0, 40.0]\n",
       "150000   64  (60.0, 70.0]\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#其实 还可以把两个都打印出来看看\n",
    "df_train[['age','bin_age']]#这样分布 一目了然  #有了age这个做基础  其他都可以仿照类似的情况去操作了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0     90826\n",
       "1.0     26316\n",
       "2.0     19522\n",
       "3.0      9483\n",
       "4.0      2862\n",
       "5.0       746\n",
       "6.0       158\n",
       "7.0        51\n",
       "8.0        24\n",
       "9.0         5\n",
       "10.0        5\n",
       "13.0        1\n",
       "20.0        1\n",
       "Name: NumberOfDependents, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于 NumberOfDependents （家属人数）字段，分成6段 [-math.inf,2,4,6,8,10,math.inf]  在做之前 还是对家属字段进行一下探索\n",
    "df_train['NumberOfDependents'].value_counts()#家属人数为0的最多  还有20个家属的 真是吊 \n",
    "#这个就不用可视化了 直接分箱了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1         (-inf, 2.0]\n",
       "2         (-inf, 2.0]\n",
       "3         (-inf, 2.0]\n",
       "4         (-inf, 2.0]\n",
       "5         (-inf, 2.0]\n",
       "             ...     \n",
       "149996    (-inf, 2.0]\n",
       "149997    (-inf, 2.0]\n",
       "149998    (-inf, 2.0]\n",
       "149999    (-inf, 2.0]\n",
       "150000    (-inf, 2.0]\n",
       "Name: bin_NumberOfDependents, Length: 150000, dtype: category\n",
       "Categories (6, interval[float64]): [(-inf, 2.0] < (2.0, 4.0] < (4.0, 6.0] < (6.0, 8.0] < (8.0, 10.0] < (10.0, inf]]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dependent_bins=[-math.inf,2,4,6,8,10,math.inf]#这里是把家庭人数分成了 0-2 2-4 4-6 6-8 8-10 10-无穷大 6组\n",
    "df_train['bin_NumberOfDependents']=pd.cut(df_train['NumberOfDependents'],bins=dependent_bins)#这个操作对家庭人数进行分箱\n",
    "df_train['bin_NumberOfDependents']#这样就按照分箱的方式分好了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>2.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0.0</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        NumberOfDependents bin_NumberOfDependents\n",
       "1                      2.0            (-inf, 2.0]\n",
       "2                      1.0            (-inf, 2.0]\n",
       "3                      0.0            (-inf, 2.0]\n",
       "4                      0.0            (-inf, 2.0]\n",
       "5                      0.0            (-inf, 2.0]\n",
       "...                    ...                    ...\n",
       "149996                 0.0            (-inf, 2.0]\n",
       "149997                 2.0            (-inf, 2.0]\n",
       "149998                 0.0            (-inf, 2.0]\n",
       "149999                 0.0            (-inf, 2.0]\n",
       "150000                 0.0            (-inf, 2.0]\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train[['NumberOfDependents','bin_NumberOfDependents']]#这样就分好箱了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     126018\n",
       "1      16033\n",
       "2       4598\n",
       "3       1754\n",
       "4        747\n",
       "5        342\n",
       "98       264\n",
       "6        140\n",
       "7         54\n",
       "8         25\n",
       "9         12\n",
       "96         5\n",
       "10         4\n",
       "12         2\n",
       "13         1\n",
       "11         1\n",
       "Name: NumberOfTime30-59DaysPastDueNotWorse, dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于3种逾期次数，即 NumberOfTime30-59DaysPastDueNotWorse， NumberOfTime60-89DaysPastDueNotWorse， NumberOfTimes90DaysLate，分成10段\n",
    "#[-math.inf,1,2,3,4,5,6,7,8,9,math.inf]\n",
    "#还是先看下数据的分布\n",
    "df_train['NumberOfTime30-59DaysPastDueNotWorse'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     142396\n",
       "1       5731\n",
       "2       1118\n",
       "3        318\n",
       "98       264\n",
       "4        105\n",
       "5         34\n",
       "6         16\n",
       "7          9\n",
       "96         5\n",
       "8          2\n",
       "11         1\n",
       "9          1\n",
       "Name: NumberOfTime60-89DaysPastDueNotWorse, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['NumberOfTime60-89DaysPastDueNotWorse'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     141662\n",
       "1       5243\n",
       "2       1555\n",
       "3        667\n",
       "4        291\n",
       "98       264\n",
       "5        131\n",
       "6         80\n",
       "7         38\n",
       "8         21\n",
       "9         19\n",
       "10         8\n",
       "11         5\n",
       "96         5\n",
       "13         4\n",
       "12         2\n",
       "14         2\n",
       "15         2\n",
       "17         1\n",
       "Name: NumberOfTimes90DaysLate, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['NumberOfTimes90DaysLate'].value_counts() #数据大致探索了 还是要分箱吧"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>1</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime30-59DaysPastDueNotWorse  NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse  NumberOfTimes90DaysLate bin_NumberOfTimes90DaysLate\n",
       "1                                          2                               (1.0, 2.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "2                                          0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "3                                          1                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        1                 (-inf, 1.0]\n",
       "4                                          0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "5                                          1                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "...                                      ...                                      ...                                   ...                                      ...                      ...                         ...\n",
       "149996                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "149997                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "149998                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "149999                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "150000                                     0                              (-inf, 1.0]                                     0                              (-inf, 1.0]                        0                 (-inf, 1.0]\n",
       "\n",
       "[150000 rows x 6 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#3）对于3种逾期次数，即 NumberOfTime30-59DaysPastDueNotWorse，NumberOfTime60-89DaysPastDueNotWorse，NumberOfTimes90DaysLate，分成10段\n",
    "dpd_bins=[-math.inf,1,2,3,4,5,6,7,8,9,math.inf]#这里是分成了 0-1 1-2 2-3 ...8-9 9-正无穷\n",
    "df_train['bin_NumberOfTime30-59DaysPastDueNotWorse']=pd.cut(df_train['NumberOfTime30-59DaysPastDueNotWorse'],bins=dpd_bins)\n",
    "df_train['bin_NumberOfTime60-89DaysPastDueNotWorse']=pd.cut(df_train['NumberOfTime60-89DaysPastDueNotWorse'],bins=dpd_bins)\n",
    "df_train['bin_NumberOfTimes90DaysLate']=pd.cut(df_train['NumberOfTimes90DaysLate'],bins=dpd_bins)\n",
    "\n",
    "#查看分箱情况\n",
    "df_train[['NumberOfTime30-59DaysPastDueNotWorse','bin_NumberOfTime30-59DaysPastDueNotWorse',\\\n",
    "          'NumberOfTime60-89DaysPastDueNotWorse','bin_NumberOfTime60-89DaysPastDueNotWorse',\\\n",
    "          'NumberOfTimes90DaysLate','bin_NumberOfTimes90DaysLate']]#这样就完成了字段的分箱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>13</td>\n",
       "      <td>(2.0, 54.0]</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>2</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>5</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>7</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>18</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>4</td>\n",
       "      <td>(-0.001, 1.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>8</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       bin_RevolvingUtilizationOfUnsecuredLines  RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    DebtRatio    bin_MonthlyIncome  MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans  NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines  NumberRealEstateLoansOrLines\n",
       "1                              (0.699, 50708.0]                              0.766127     (0.468, 4.0]     0.802982  (8250.0, 3008750.0]         9120.0                        (12.0, 58.0]                               13                      (2.0, 54.0]                             6\n",
       "2                              (0.699, 50708.0]                              0.957151  (-0.001, 0.134]     0.121876     (-0.001, 3400.0]         2600.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             0\n",
       "3                                (0.271, 0.699]                              0.658180  (-0.001, 0.134]     0.085113     (-0.001, 3400.0]         3042.0                       (-0.001, 4.0]                                2                    (-0.001, 1.0]                             0\n",
       "4                               (0.0832, 0.271]                              0.233810  (-0.001, 0.134]     0.036050     (-0.001, 3400.0]         3300.0                          (4.0, 6.0]                                5                    (-0.001, 1.0]                             0\n",
       "5                              (0.699, 50708.0]                              0.907239  (-0.001, 0.134]     0.024926  (8250.0, 3008750.0]        63588.0                          (6.0, 9.0]                                7                    (-0.001, 1.0]                             1\n",
       "...                                         ...                                   ...              ...          ...                  ...            ...                                 ...                              ...                              ...                           ...\n",
       "149996                         (0.0192, 0.0832]                              0.040674   (0.134, 0.287]     0.225131     (-0.001, 3400.0]         2100.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             1\n",
       "149997                           (0.271, 0.699]                              0.299745     (0.468, 4.0]     0.716562     (5400.0, 8250.0]         5584.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             1\n",
       "149998                          (0.0832, 0.271]                              0.246044  (4.0, 329664.0]  3870.000000     (3400.0, 5400.0]         5400.0                        (12.0, 58.0]                               18                    (-0.001, 1.0]                             1\n",
       "149999                         (-0.001, 0.0192]                              0.000000  (-0.001, 0.134]     0.000000     (5400.0, 8250.0]         5716.0                       (-0.001, 4.0]                                4                    (-0.001, 1.0]                             0\n",
       "150000                         (0.699, 50708.0]                              0.850283   (0.134, 0.287]     0.249908     (5400.0, 8250.0]         8158.0                          (6.0, 9.0]                                8                       (1.0, 2.0]                             2\n",
       "\n",
       "[150000 rows x 10 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#接下来对其余字段进行一下分箱处理\n",
    "#对于其余字段，即RevolvingUtilizationOfUnsecuredLines, DebtRatio,MonthlyIncome, NumberOfOpenCreditLinesAndLoans,NumberRealEstateLoansOrLines \n",
    "#把他们分成5段 采用pd.qcut()函数 按照频率来分一下  q=5表示分成5段  \n",
    "#duplicates='drop'表示如果数据分布特别不均匀 按照频率分不了5段的时候 合并一下\n",
    "df_train['bin_RevolvingUtilizationOfUnsecuredLines']=pd.qcut(df_train['RevolvingUtilizationOfUnsecuredLines'],q=5,duplicates='drop')\n",
    "df_train['bin_DebtRatio']=pd.qcut(df_train['DebtRatio'],q=5,duplicates='drop')\n",
    "df_train['bin_MonthlyIncome']=pd.qcut(df_train['MonthlyIncome'],q=5,duplicates='drop')\n",
    "df_train['bin_NumberOfOpenCreditLinesAndLoans']=pd.qcut(df_train['NumberOfOpenCreditLinesAndLoans'],q=5,duplicates='drop')\n",
    "df_train['bin_NumberRealEstateLoansOrLines']=pd.qcut(df_train['NumberRealEstateLoansOrLines'],q=5,duplicates='drop')\n",
    "\n",
    "df_train[['bin_RevolvingUtilizationOfUnsecuredLines','RevolvingUtilizationOfUnsecuredLines',\\\n",
    "          'bin_DebtRatio','DebtRatio',\\\n",
    "          'bin_MonthlyIncome','MonthlyIncome',\\\n",
    "          'bin_NumberOfOpenCreditLinesAndLoans','NumberOfOpenCreditLinesAndLoans',\\\n",
    "          'bin_NumberRealEstateLoansOrLines','NumberRealEstateLoansOrLines']]#这样就完成了字段的分箱 分成了5段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-0.001, 1.0]    108526\n",
       "(1.0, 2.0]        31522\n",
       "(2.0, 54.0]        9952\n",
       "Name: bin_NumberRealEstateLoansOrLines, dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#哪一个字段来看看\n",
    "df_train['bin_NumberRealEstateLoansOrLines'].value_counts()#这里本来是分成5段 但是 duplicates='drop' 这里就合并了一下 因为数据分布问题\n",
    "#分成了3段 这样是不合理的  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     56188\n",
       "1     52338\n",
       "2     31522\n",
       "3      6300\n",
       "4      2170\n",
       "5       689\n",
       "6       320\n",
       "7       171\n",
       "8        93\n",
       "9        78\n",
       "10       37\n",
       "11       23\n",
       "12       18\n",
       "13       15\n",
       "14        7\n",
       "15        7\n",
       "16        4\n",
       "17        4\n",
       "25        3\n",
       "18        2\n",
       "19        2\n",
       "20        2\n",
       "23        2\n",
       "32        1\n",
       "21        1\n",
       "26        1\n",
       "29        1\n",
       "54        1\n",
       "Name: NumberRealEstateLoansOrLines, dtype: int64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['NumberRealEstateLoansOrLines'].value_counts()#按照人数来分 3万人一份  这里0和1 都占了4份 然后合并了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(3.0, inf]</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       bin_NumberRealEstateLoansOrLines  NumberRealEstateLoansOrLines\n",
       "1                            (3.0, inf]                             6\n",
       "2                           (-inf, 0.0]                             0\n",
       "3                           (-inf, 0.0]                             0\n",
       "4                           (-inf, 0.0]                             0\n",
       "5                            (0.0, 1.0]                             1\n",
       "...                                 ...                           ...\n",
       "149996                       (0.0, 1.0]                             1\n",
       "149997                       (0.0, 1.0]                             1\n",
       "149998                       (0.0, 1.0]                             1\n",
       "149999                      (-inf, 0.0]                             0\n",
       "150000                       (1.0, 2.0]                             2\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loans_bins=[-math.inf,0,1,2,3,math.inf]#分成了 负无穷-0  0-1 1-2 2-3 3-正无穷 5段\n",
    "df_train['bin_NumberRealEstateLoansOrLines']=pd.cut(df_train['NumberRealEstateLoansOrLines'],bins=loans_bins)#这样重新分一下\n",
    "df_train[['bin_NumberRealEstateLoansOrLines','NumberRealEstateLoansOrLines']]#这样分成了5段貌似更合理一些"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-inf, 0.0]    56188\n",
       "(0.0, 1.0]     52338\n",
       "(1.0, 2.0]     31522\n",
       "(2.0, 3.0]      6300\n",
       "(3.0, inf]      3652\n",
       "Name: bin_NumberRealEstateLoansOrLines, dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['bin_NumberRealEstateLoansOrLines'].value_counts()#这个分起来 好像更合理一点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(3.0, inf]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents       bin_age bin_NumberOfDependents bin_NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTimes90DaysLate bin_RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    bin_MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines\n",
       "1                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                               (1.0, 2.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]     (0.468, 4.0]  (8250.0, 3008750.0]                        (12.0, 58.0]                       (3.0, inf]\n",
       "2                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "3                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "4                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (-0.001, 0.134]     (-0.001, 3400.0]                          (4.0, 6.0]                      (-inf, 0.0]\n",
       "5                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]  (8250.0, 3008750.0]                          (6.0, 9.0]                       (0.0, 1.0]\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...           ...                    ...                                      ...                                      ...                         ...                                      ...              ...                  ...                                 ...                              ...\n",
       "149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0   (70.0, inf]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.0192, 0.0832]   (0.134, 0.287]     (-0.001, 3400.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]     (0.468, 4.0]     (5400.0, 8250.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149998                 0                              0.246044   58                                     0  3870.000000         5400.0                               18                        0                             1                                     0                 0.0  (50.0, 60.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (4.0, 329664.0]     (3400.0, 5400.0]                        (12.0, 58.0]                       (0.0, 1.0]\n",
       "149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (-0.001, 0.0192]  (-0.001, 0.134]     (5400.0, 8250.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0  (60.0, 70.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]   (0.134, 0.287]     (5400.0, 8250.0]                          (6.0, 9.0]                       (1.0, 2.0]\n",
       "\n",
       "[150000 rows x 21 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_columns', 10000)\n",
    "#现在这个时候再看看df_train\n",
    "df_train#本来是11个字段 出来要预测的那个值之外 其余的全部做了一个分箱  所以就变成了21个字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['SeriousDlqin2yrs', 'RevolvingUtilizationOfUnsecuredLines', 'age',\n",
       "       'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio',\n",
       "       'MonthlyIncome', 'NumberOfOpenCreditLinesAndLoans',\n",
       "       'NumberOfTimes90DaysLate', 'NumberRealEstateLoansOrLines',\n",
       "       'NumberOfTime60-89DaysPastDueNotWorse', 'NumberOfDependents',\n",
       "       'bin_age', 'bin_NumberOfDependents',\n",
       "       'bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       "       'bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       "       'bin_NumberOfTimes90DaysLate',\n",
       "       'bin_RevolvingUtilizationOfUnsecuredLines', 'bin_DebtRatio',\n",
       "       'bin_MonthlyIncome', 'bin_NumberOfOpenCreditLinesAndLoans',\n",
       "       'bin_NumberRealEstateLoansOrLines'], dtype=object)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.columns.values#除了SeriousDlqin2yrs是我们要预测的目标值 所以它没有分享  其余的全部分箱了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['bin_age',\n",
       " 'bin_NumberOfDependents',\n",
       " 'bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTimes90DaysLate',\n",
       " 'bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'bin_DebtRatio',\n",
       " 'bin_MonthlyIncome',\n",
       " 'bin_NumberOfOpenCreditLinesAndLoans',\n",
       " 'bin_NumberRealEstateLoansOrLines']"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#统计一下分箱字段\n",
    "#统计分箱字段\n",
    "bin_cols=[c for c in df_train.columns.values if c.startswith('bin_')]\n",
    "print(len(bin_cols))\n",
    "bin_cols#果然是10个"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3计算WOE与IV值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1         (40.0, 50.0]\n",
       "2         (25.0, 40.0]\n",
       "3         (25.0, 40.0]\n",
       "4         (25.0, 40.0]\n",
       "5         (40.0, 50.0]\n",
       "              ...     \n",
       "149996     (70.0, inf]\n",
       "149997    (40.0, 50.0]\n",
       "149998    (50.0, 60.0]\n",
       "149999    (25.0, 40.0]\n",
       "150000    (60.0, 70.0]\n",
       "Name: bin_age, Length: 150000, dtype: category\n",
       "Categories (6, interval[float64]): [(-inf, 25.0] < (25.0, 40.0] < (40.0, 50.0] < (50.0, 60.0] < (60.0, 70.0] < (70.0, inf]]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#下面这几行代码 是下面函数的分布注释\n",
    "df_train['bin_age']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['bin_age'].nunique()# nunique    number of unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(40.0, 50.0], (25.0, 40.0], (70.0, inf], (50.0, 60.0], (60.0, 70.0], (-inf, 25.0]]\n",
       "Categories (6, interval[float64]): [(-inf, 25.0] < (25.0, 40.0] < (40.0, 50.0] < (50.0, 60.0] < (60.0, 70.0] < (70.0, inf]]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['bin_age'].unique()#就是下面这6组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Interval(40.0, 50.0, closed='right')"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(df_train['bin_age'].unique())[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "(40.0, 50.0]\n",
      "1\n",
      "(25.0, 40.0]\n",
      "2\n",
      "(70.0, inf]\n",
      "3\n",
      "(50.0, 60.0]\n",
      "4\n",
      "(60.0, 70.0]\n",
      "5\n",
      "(-inf, 25.0]\n"
     ]
    }
   ],
   "source": [
    "#这样就遍历完了 里面的每一个分组\n",
    "for i in range(df_train['bin_age'].nunique()):\n",
    "    print(i)\n",
    "    print(list(df_train['bin_age'].unique())[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Variable         Value    All   Bad\n",
      "0  bin_age  (40.0, 50.0]  35037  2893\n",
      "1  bin_age  (25.0, 40.0]  32069  3296\n",
      "2  bin_age   (70.0, inf]  17636   398\n",
      "3  bin_age  (50.0, 60.0]  34806  2149\n",
      "4  bin_age  (60.0, 70.0]  27424   952\n",
      "5  bin_age  (-inf, 25.0]   3028   338\n"
     ]
    }
   ],
   "source": [
    "#计算一下IV值\n",
    "#计算IV,衡量一下变量的预测能力\n",
    "def cal_IV(df, feature, target):\n",
    "    lst = []\n",
    "    cols = ['Variable', 'Value', 'All', 'Bad']#变量名称 值 整个情况 bad的情况\n",
    "    # 对feature字段中的每个分箱的取值进行变量\n",
    "    for i in range(df[feature].nunique()): # unique代表不同的值，nunique = number of unique 不同值的个数\n",
    "        # feature字段比如bib_age的第i个分箱取值 \n",
    "        val = list(df[feature].unique())[i]\n",
    "        # 统计feature比如 ‘bin_age’， feature 对应的分为值 比如 (40.0, 50.0]， \n",
    "        #len(df[df[feature]==val])这个 值 的个数 是一个总数，这个 值 导致target=1的个数 取名Bad 我们这个数据集的target就是那个SeriousDlqin2yrs\n",
    "        lst.append([feature, val, len(df[df[feature]==val]), len(df[(df[feature]==val) & (df[target]==1)])])    \n",
    "    #上述几个指标拿出来了以后 做成一个dataFrame    \n",
    "    data = pd.DataFrame(lst, columns=cols)\n",
    "    print(data)\n",
    "cal_IV(df_train,'bin_age','SeriousDlqin2yrs')   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Variable         Value    All   Bad\n",
      "0  bin_age  (40.0, 50.0]  35037  2893\n",
      "1  bin_age  (25.0, 40.0]  32069  3296\n",
      "2  bin_age   (70.0, inf]  17636   398\n",
      "3  bin_age  (50.0, 60.0]  34806  2149\n",
      "4  bin_age  (60.0, 70.0]  27424   952\n",
      "5  bin_age  (-inf, 25.0]   3028   338\n",
      "新生成的data:\n",
      "   Variable         Value    All   Bad  Margin Bad  Margin Good       woe        iv    iv_sum\n",
      "0  bin_age  (40.0, 50.0]  35037  2893    0.288550     0.229643  0.228343  0.013451  0.240411\n",
      "1  bin_age  (25.0, 40.0]  32069  3296    0.328745     0.205560  0.469547  0.057841  0.240411\n",
      "2  bin_age   (70.0, inf]  17636   398    0.039697     0.123151 -1.132145  0.094483  0.240411\n",
      "3  bin_age  (50.0, 60.0]  34806  2149    0.214343     0.233308 -0.084782  0.001608  0.240411\n",
      "4  bin_age  (60.0, 70.0]  27424   952    0.094953     0.189121 -0.689003  0.064882  0.240411\n",
      "5  bin_age  (-inf, 25.0]   3028   338    0.033712     0.019218  0.562024  0.008146  0.240411\n",
      "排序之后的data：\n",
      "   Variable         Value    All   Bad  Margin Bad  Margin Good       woe        iv    iv_sum\n",
      "5  bin_age  (-inf, 25.0]   3028   338    0.033712     0.019218  0.562024  0.008146  0.240411\n",
      "1  bin_age  (25.0, 40.0]  32069  3296    0.328745     0.205560  0.469547  0.057841  0.240411\n",
      "0  bin_age  (40.0, 50.0]  35037  2893    0.288550     0.229643  0.228343  0.013451  0.240411\n",
      "3  bin_age  (50.0, 60.0]  34806  2149    0.214343     0.233308 -0.084782  0.001608  0.240411\n",
      "4  bin_age  (60.0, 70.0]  27424   952    0.094953     0.189121 -0.689003  0.064882  0.240411\n",
      "2  bin_age   (70.0, inf]  17636   398    0.039697     0.123151 -1.132145  0.094483  0.240411\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.24041120302785982"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#计算IV,衡量一下变量的预测能力\n",
    "def cal_IV(df, feature, target):\n",
    "    lst = []\n",
    "    cols = ['Variable', 'Value', 'All', 'Bad']#变量名称 值 整个情况 bad的情况\n",
    "    # 对feature字段中的每个分箱的取值进行变量\n",
    "    for i in range(df[feature].nunique()): # unique代表不同的值，nunique = number of unique 不同值的个数\n",
    "        # feature字段的第i个分箱取值\n",
    "        val = list(df[feature].unique())[i]\n",
    "        # 统计feature比如 ‘bin_age’， feature 对应的分为值 比如 (40.0, 50.0]， \n",
    "        #len(df[df[feature]==val])这个 值 的个数 是一个总数，这个 值 导致target=1的个数 取名Bad 我们这个数据集的target就是那个SeriousDlqin2yrs\n",
    "        lst.append([feature, val, len(df[df[feature]==val]), len(df[(df[feature]==val) & (df[target]==1)])])    \n",
    "    #上述几个指标拿出来了以后 做成一个dataFrame    \n",
    "    data = pd.DataFrame(lst, columns=cols)\n",
    "    print(data)\n",
    "    # 筛选bad大于0的情况    #从这里开始就是计算WOE和IV值的公式了\n",
    "    data = data[data['Bad']>0]  \n",
    "    #data['Share'] = data['All'] / data['All'].sum() # 这个value所占比例   感觉这里也不需要这个\n",
    "    #data['Bad Rate'] = data['Bad'] / data['All'] # 这个value导致bad情况，在该value个数的比例  感觉这个不需要\n",
    "    data['Margin Bad'] = data['Bad'] / data['Bad'].sum() #  Margin Bad  某一项bad占所有bad之和的比例\n",
    "    data['Margin Good'] = (data['All'] - data['Bad']) / (data['All'] - data['Bad']).sum() \n",
    "    # 如果需要避免分子为0，导致-inf, log1p(x) = log(1+x), 这里 NumberOfTime60-89DaysPastDueNotWorse 第8分箱里bad为0\n",
    "    data['woe'] = np.log(data['Margin Bad'] / data['Margin Good'])\n",
    "    data['iv']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe'])\n",
    "    data['iv_sum']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe']).sum()\n",
    "    print(\"新生成的data:\\n\",data)\n",
    "    data.sort_values(by=['Variable', 'Value'], inplace=True)\n",
    "    print(\"排序之后的data：\\n\",data)\n",
    "    return (data['iv_sum'].values[0])\n",
    "\n",
    "cal_IV(df_train,'bin_age','SeriousDlqin2yrs')   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "bin_age 0.24041120302785982\n",
      "bin_NumberOfDependents 0.01450836007644442\n",
      "bin_NumberOfTime30-59DaysPastDueNotWorse 0.492444774570198\n",
      "bin_NumberOfTime60-89DaysPastDueNotWorse 0.2665587583516951\n",
      "bin_NumberOfTimes90DaysLate 0.49160685733515563\n",
      "bin_RevolvingUtilizationOfUnsecuredLines 1.0596188771423887\n",
      "bin_DebtRatio 0.05948761145809681\n",
      "bin_MonthlyIncome 0.05623446147714756\n",
      "bin_NumberOfOpenCreditLinesAndLoans 0.04802315528985505\n",
      "bin_NumberRealEstateLoansOrLines 0.06167337290177645\n"
     ]
    }
   ],
   "source": [
    "#计算IV,衡量一下变量的预测能力\n",
    "def cal_IV(df, feature, target):\n",
    "    lst = []\n",
    "    cols = ['Variable', 'Value', 'All', 'Bad']#变量名称 值 整个情况 bad的情况\n",
    "    for i in range(df[feature].nunique()):\n",
    "        val = list(df[feature].unique())[i]\n",
    "        lst.append([feature, val, len(df[df[feature]==val]), len(df[(df[feature]==val) & (df[target]==1)])])\n",
    "    data = pd.DataFrame(lst, columns=cols)\n",
    "    data = data[data['Bad']>0]\n",
    "    data['Margin Bad'] = data['Bad'] / data['Bad'].sum() #  Margin Bad  bad占所有value的比例\n",
    "    data['Margin Good'] = (data['All'] - data['Bad']) / (data['All'] - data['Bad']).sum() \n",
    "    data['woe'] = np.log(data['Margin Bad'] / data['Margin Good'])\n",
    "    data['iv']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe'])\n",
    "    data['iv_sum']  = ((data['Margin Bad'] - data['Margin Good']) * data['woe']).sum()\n",
    "    data.sort_values(by=['Variable', 'Value'], inplace=True)\n",
    "    return (data['iv_sum'].values[0])\n",
    "#计算每个字段的iv值\n",
    "for f  in bin_cols:\n",
    "    print(f,cal_IV(df_train,f,'SeriousDlqin2yrs'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 只选择iv>0.1的值，入选的字段又5个\n",
    "* NumberOfTime30-59DaysPastDueNotWorse\n",
    "* NumberOfTime60-89DaysPastDueNotWorse\n",
    "* NumberOfTimes90DaysLate\n",
    "* bin_RevolvingUtilizationOfUnsecuredLines\n",
    "* bin_age"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4根据计算结果再探索一下数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'NumberOfTimes90DaysLate',\n",
       " 'RevolvingUtilizationOfUnsecuredLines',\n",
       " 'age']"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_cols=['NumberOfTime30-59DaysPastDueNotWorse','NumberOfTime60-89DaysPastDueNotWorse',\n",
    "              'NumberOfTimes90DaysLate','RevolvingUtilizationOfUnsecuredLines','age']\n",
    "feature_cols#我们把这5个字段组成一个 list  用 feature_cols 保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.699, 50708.0]    30000\n",
       "(0.271, 0.699]      30000\n",
       "(0.0832, 0.271]     30000\n",
       "(0.0192, 0.0832]    30000\n",
       "(-0.001, 0.0192]    30000\n",
       "Name: bin_RevolvingUtilizationOfUnsecuredLines, dtype: int64"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#下面这几句代码是对RevolvingUtilizationOfUnsecuredLines这个数据的探索 因为bin_RevolvingUtilizationOfUnsecuredLines= 1.0596188771423887  \n",
    "#大于0.5的就有点过分了 需要再看看咋回事儿\n",
    "#这个字段的意思是：除房地产和汽车贷款等无分期付款债务外， 信用卡和个人信用额度的总余额除以信贷限额\n",
    "df_train['bin_RevolvingUtilizationOfUnsecuredLines'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    150000.000000\n",
       "mean          6.048438\n",
       "std         249.755371\n",
       "min           0.000000\n",
       "25%           0.029867\n",
       "50%           0.154181\n",
       "75%           0.559046\n",
       "max       50708.000000\n",
       "Name: RevolvingUtilizationOfUnsecuredLines, dtype: float64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['RevolvingUtilizationOfUnsecuredLines'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3321"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_train[df_train['RevolvingUtilizationOfUnsecuredLines']>1])#这是一个比例 一般是小于1的 但是这个地方大于1的还有3000多人 \n",
    "#而且最大值是50708  说明这个标签有一定的泄漏的意思   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.766127</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.957151</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.658180</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.233810</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.907239</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0.040674</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0.299745</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0.246044</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0.850283</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        RevolvingUtilizationOfUnsecuredLines bin_RevolvingUtilizationOfUnsecuredLines\n",
       "1                                   0.766127                         (0.699, 50708.0]\n",
       "2                                   0.957151                         (0.699, 50708.0]\n",
       "3                                   0.658180                           (0.271, 0.699]\n",
       "4                                   0.233810                          (0.0832, 0.271]\n",
       "5                                   0.907239                         (0.699, 50708.0]\n",
       "...                                      ...                                      ...\n",
       "149996                              0.040674                         (0.0192, 0.0832]\n",
       "149997                              0.299745                           (0.271, 0.699]\n",
       "149998                              0.246044                          (0.0832, 0.271]\n",
       "149999                              0.000000                         (-0.001, 0.0192]\n",
       "150000                              0.850283                         (0.699, 50708.0]\n",
       "\n",
       "[150000 rows x 2 columns]"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train[['RevolvingUtilizationOfUnsecuredLines','bin_RevolvingUtilizationOfUnsecuredLines']]#先看看分布情况\n",
    "#这个数字分段是一个比例"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5WOE编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(3.0, inf]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150000</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents       bin_age bin_NumberOfDependents bin_NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTimes90DaysLate bin_RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    bin_MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines\n",
       "1                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                               (1.0, 2.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]     (0.468, 4.0]  (8250.0, 3008750.0]                        (12.0, 58.0]                       (3.0, inf]\n",
       "2                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "3                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "4                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (-0.001, 0.134]     (-0.001, 3400.0]                          (4.0, 6.0]                      (-inf, 0.0]\n",
       "5                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]  (8250.0, 3008750.0]                          (6.0, 9.0]                       (0.0, 1.0]\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...           ...                    ...                                      ...                                      ...                         ...                                      ...              ...                  ...                                 ...                              ...\n",
       "149996                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0   (70.0, inf]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.0192, 0.0832]   (0.134, 0.287]     (-0.001, 3400.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149997                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]     (0.468, 4.0]     (5400.0, 8250.0]                       (-0.001, 4.0]                       (0.0, 1.0]\n",
       "149998                 0                              0.246044   58                                     0  3870.000000         5400.0                               18                        0                             1                                     0                 0.0  (50.0, 60.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (4.0, 329664.0]     (3400.0, 5400.0]                        (12.0, 58.0]                       (0.0, 1.0]\n",
       "149999                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (-0.001, 0.0192]  (-0.001, 0.134]     (5400.0, 8250.0]                       (-0.001, 4.0]                      (-inf, 0.0]\n",
       "150000                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0  (60.0, 70.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]   (0.134, 0.287]     (5400.0, 8250.0]                          (6.0, 9.0]                       (1.0, 2.0]\n",
       "\n",
       "[150000 rows x 21 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_new = df_train.copy()\n",
    "df_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['bin_age',\n",
       " 'bin_NumberOfDependents',\n",
       " 'bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'bin_NumberOfTimes90DaysLate',\n",
       " 'bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'bin_DebtRatio',\n",
       " 'bin_MonthlyIncome',\n",
       " 'bin_NumberOfOpenCreditLinesAndLoans',\n",
       " 'bin_NumberRealEstateLoansOrLines']"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bin_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">SeriousDlqin2yrs</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bin_age</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>(-inf, 25.0]</th>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(25.0, 40.0]</th>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(40.0, 50.0]</th>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(50.0, 60.0]</th>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(60.0, 70.0]</th>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(70.0, inf]</th>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             SeriousDlqin2yrs       \n",
       "                          sum  count\n",
       "bin_age                             \n",
       "(-inf, 25.0]              338   3028\n",
       "(25.0, 40.0]             3296  32069\n",
       "(40.0, 50.0]             2893  35037\n",
       "(50.0, 60.0]             2149  34806\n",
       "(60.0, 70.0]              952  27424\n",
       "(70.0, inf]               398  17636"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#下面这几步是下面那几个大函数的分步走\n",
    "df_woe=df_new.groupby('bin_age').agg({'SeriousDlqin2yrs':['sum', 'count']})\n",
    "df_woe#这里是分组聚合 sum表示那一组情况出现的个数 比如(-inf, 25.0]这一组出现了3208次 但是它是一个分组  在这个组的人的个数是3028   \n",
    "#count的和是15000    #因为这里是按照 SeriousDlqin2yrs来聚合的  所以sum的数量就是label=1的数量  就是要违约了  即bad的数量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('SeriousDlqin2yrs', 'sum') ('SeriousDlqin2yrs', 'count')] <class 'numpy.ndarray'>\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrssum</th>\n",
       "      <th>SeriousDlqin2yrscount</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bin_age</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>(-inf, 25.0]</th>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(25.0, 40.0]</th>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(40.0, 50.0]</th>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(50.0, 60.0]</th>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(60.0, 70.0]</th>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(70.0, inf]</th>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              SeriousDlqin2yrssum  SeriousDlqin2yrscount\n",
       "bin_age                                                 \n",
       "(-inf, 25.0]                  338                   3028\n",
       "(25.0, 40.0]                 3296                  32069\n",
       "(40.0, 50.0]                 2893                  35037\n",
       "(50.0, 60.0]                 2149                  34806\n",
       "(60.0, 70.0]                  952                  27424\n",
       "(70.0, inf]                   398                  17636"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df_woe.columns.values,type(df_woe.columns.values))\n",
    "df_woe.columns=list(map(''.join, df_woe.columns.values))#map函数的作用是把 括号内部前面的function作用在后面中的序列中的每一个元素上\n",
    "df_woe#这里是把里面的两个字段起一个连接作用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_age</th>\n",
       "      <th>SeriousDlqin2yrssum</th>\n",
       "      <th>SeriousDlqin2yrscount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        bin_age  SeriousDlqin2yrssum  SeriousDlqin2yrscount\n",
       "0  (-inf, 25.0]                  338                   3028\n",
       "1  (25.0, 40.0]                 3296                  32069\n",
       "2  (40.0, 50.0]                 2893                  35037\n",
       "3  (50.0, 60.0]                 2149                  34806\n",
       "4  (60.0, 70.0]                  952                  27424\n",
       "5   (70.0, inf]                  398                  17636"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_woe = df_woe.reset_index()#重置一下索引\n",
    "df_woe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bad</th>\n",
       "      <th>all</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>338</td>\n",
       "      <td>3028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        bin_age   bad    all\n",
       "0  (-inf, 25.0]   338   3028\n",
       "1  (25.0, 40.0]  3296  32069\n",
       "2  (40.0, 50.0]  2893  35037\n",
       "3  (50.0, 60.0]  2149  34806\n",
       "4  (60.0, 70.0]   952  27424\n",
       "5   (70.0, inf]   398  17636"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_woe = df_woe.rename(columns={'SeriousDlqin2yrs'+'sum':'bad', 'SeriousDlqin2yrs'+'count':'all'})#更改索引名称\n",
    "df_woe#这里实际上就是bad和all 有了这些东西之后 就可以去计算WOE编码了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SeriousDlqin2yrs</th>\n",
       "      <th>RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>age</th>\n",
       "      <th>NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>DebtRatio</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>NumberOfTimes90DaysLate</th>\n",
       "      <th>NumberRealEstateLoansOrLines</th>\n",
       "      <th>NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>NumberOfDependents</th>\n",
       "      <th>bin_age</th>\n",
       "      <th>bin_NumberOfDependents</th>\n",
       "      <th>bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bin_DebtRatio</th>\n",
       "      <th>bin_MonthlyIncome</th>\n",
       "      <th>bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>bad_bin_age</th>\n",
       "      <th>all_bin_age</th>\n",
       "      <th>good_bin_age</th>\n",
       "      <th>Margin Bad_bin_age</th>\n",
       "      <th>Margin Good_bin_age</th>\n",
       "      <th>woe_bin_age</th>\n",
       "      <th>bad_bin_NumberOfDependents</th>\n",
       "      <th>all_bin_NumberOfDependents</th>\n",
       "      <th>good_bin_NumberOfDependents</th>\n",
       "      <th>Margin Bad_bin_NumberOfDependents</th>\n",
       "      <th>Margin Good_bin_NumberOfDependents</th>\n",
       "      <th>woe_bin_NumberOfDependents</th>\n",
       "      <th>bad_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>all_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>good_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>Margin Bad_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>Margin Good_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>woe_bin_NumberOfTime30-59DaysPastDueNotWorse</th>\n",
       "      <th>bad_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>all_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>good_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>Margin Bad_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>Margin Good_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>woe_bin_NumberOfTime60-89DaysPastDueNotWorse</th>\n",
       "      <th>bad_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>all_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>good_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>Margin Bad_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>Margin Good_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>woe_bin_NumberOfTimes90DaysLate</th>\n",
       "      <th>bad_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>all_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>good_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>Margin Bad_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>Margin Good_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>woe_bin_RevolvingUtilizationOfUnsecuredLines</th>\n",
       "      <th>bad_bin_DebtRatio</th>\n",
       "      <th>all_bin_DebtRatio</th>\n",
       "      <th>good_bin_DebtRatio</th>\n",
       "      <th>Margin Bad_bin_DebtRatio</th>\n",
       "      <th>Margin Good_bin_DebtRatio</th>\n",
       "      <th>woe_bin_DebtRatio</th>\n",
       "      <th>bad_bin_MonthlyIncome</th>\n",
       "      <th>all_bin_MonthlyIncome</th>\n",
       "      <th>good_bin_MonthlyIncome</th>\n",
       "      <th>Margin Bad_bin_MonthlyIncome</th>\n",
       "      <th>Margin Good_bin_MonthlyIncome</th>\n",
       "      <th>woe_bin_MonthlyIncome</th>\n",
       "      <th>bad_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>all_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>good_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>Margin Bad_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>Margin Good_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>woe_bin_NumberOfOpenCreditLinesAndLoans</th>\n",
       "      <th>bad_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>all_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>good_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>Margin Bad_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>Margin Good_bin_NumberRealEstateLoansOrLines</th>\n",
       "      <th>woe_bin_NumberRealEstateLoansOrLines</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.766127</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>0.802982</td>\n",
       "      <td>9120.0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(3.0, inf]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "      <td>32144</td>\n",
       "      <td>0.288550</td>\n",
       "      <td>0.229643</td>\n",
       "      <td>0.813822</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>1219</td>\n",
       "      <td>4598</td>\n",
       "      <td>3379</td>\n",
       "      <td>0.121584</td>\n",
       "      <td>0.024140</td>\n",
       "      <td>1.797837</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>2974</td>\n",
       "      <td>30109</td>\n",
       "      <td>27135</td>\n",
       "      <td>0.296629</td>\n",
       "      <td>0.193857</td>\n",
       "      <td>0.928274</td>\n",
       "      <td>1387</td>\n",
       "      <td>29993</td>\n",
       "      <td>28606</td>\n",
       "      <td>0.138340</td>\n",
       "      <td>0.204367</td>\n",
       "      <td>0.516960</td>\n",
       "      <td>1846</td>\n",
       "      <td>27684</td>\n",
       "      <td>25838</td>\n",
       "      <td>0.184121</td>\n",
       "      <td>0.184591</td>\n",
       "      <td>0.691873</td>\n",
       "      <td>419</td>\n",
       "      <td>3652</td>\n",
       "      <td>3233</td>\n",
       "      <td>0.041791</td>\n",
       "      <td>0.023097</td>\n",
       "      <td>1.032961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0.957151</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0.121876</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.658180</td>\n",
       "      <td>38</td>\n",
       "      <td>1</td>\n",
       "      <td>0.085113</td>\n",
       "      <td>3042.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>2107</td>\n",
       "      <td>30000</td>\n",
       "      <td>27893</td>\n",
       "      <td>0.210154</td>\n",
       "      <td>0.199273</td>\n",
       "      <td>0.720083</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0.233810</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.036050</td>\n",
       "      <td>3300.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(4.0, 6.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>877</td>\n",
       "      <td>30000</td>\n",
       "      <td>29123</td>\n",
       "      <td>0.087473</td>\n",
       "      <td>0.208060</td>\n",
       "      <td>0.350952</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>1573</td>\n",
       "      <td>26545</td>\n",
       "      <td>24972</td>\n",
       "      <td>0.156892</td>\n",
       "      <td>0.178405</td>\n",
       "      <td>0.630962</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.907239</td>\n",
       "      <td>49</td>\n",
       "      <td>1</td>\n",
       "      <td>0.024926</td>\n",
       "      <td>63588.0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(8250.0, 3008750.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "      <td>32144</td>\n",
       "      <td>0.288550</td>\n",
       "      <td>0.229643</td>\n",
       "      <td>0.813822</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>1387</td>\n",
       "      <td>29993</td>\n",
       "      <td>28606</td>\n",
       "      <td>0.138340</td>\n",
       "      <td>0.204367</td>\n",
       "      <td>0.516960</td>\n",
       "      <td>2017</td>\n",
       "      <td>37162</td>\n",
       "      <td>35145</td>\n",
       "      <td>0.201177</td>\n",
       "      <td>0.251082</td>\n",
       "      <td>0.588475</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149995</th>\n",
       "      <td>0</td>\n",
       "      <td>0.040674</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0.225131</td>\n",
       "      <td>2100.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(-0.001, 3400.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>398</td>\n",
       "      <td>17636</td>\n",
       "      <td>17238</td>\n",
       "      <td>0.039697</td>\n",
       "      <td>0.123151</td>\n",
       "      <td>0.279404</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>497</td>\n",
       "      <td>30000</td>\n",
       "      <td>29503</td>\n",
       "      <td>0.049571</td>\n",
       "      <td>0.210775</td>\n",
       "      <td>0.211221</td>\n",
       "      <td>1716</td>\n",
       "      <td>30000</td>\n",
       "      <td>28284</td>\n",
       "      <td>0.171155</td>\n",
       "      <td>0.202066</td>\n",
       "      <td>0.613576</td>\n",
       "      <td>2789</td>\n",
       "      <td>30289</td>\n",
       "      <td>27500</td>\n",
       "      <td>0.278177</td>\n",
       "      <td>0.196465</td>\n",
       "      <td>0.882076</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149996</th>\n",
       "      <td>0</td>\n",
       "      <td>0.299745</td>\n",
       "      <td>44</td>\n",
       "      <td>0</td>\n",
       "      <td>0.716562</td>\n",
       "      <td>5584.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>(0.468, 4.0]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>2893</td>\n",
       "      <td>35037</td>\n",
       "      <td>32144</td>\n",
       "      <td>0.288550</td>\n",
       "      <td>0.229643</td>\n",
       "      <td>0.813822</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>2107</td>\n",
       "      <td>30000</td>\n",
       "      <td>27893</td>\n",
       "      <td>0.210154</td>\n",
       "      <td>0.199273</td>\n",
       "      <td>0.720083</td>\n",
       "      <td>2974</td>\n",
       "      <td>30109</td>\n",
       "      <td>27135</td>\n",
       "      <td>0.296629</td>\n",
       "      <td>0.193857</td>\n",
       "      <td>0.928274</td>\n",
       "      <td>1819</td>\n",
       "      <td>29961</td>\n",
       "      <td>28142</td>\n",
       "      <td>0.181428</td>\n",
       "      <td>0.201052</td>\n",
       "      <td>0.643114</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149997</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246044</td>\n",
       "      <td>58</td>\n",
       "      <td>0</td>\n",
       "      <td>3870.000000</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>(4.0, 329664.0]</td>\n",
       "      <td>(3400.0, 5400.0]</td>\n",
       "      <td>(12.0, 58.0]</td>\n",
       "      <td>(0.0, 1.0]</td>\n",
       "      <td>2149</td>\n",
       "      <td>34806</td>\n",
       "      <td>32657</td>\n",
       "      <td>0.214343</td>\n",
       "      <td>0.233308</td>\n",
       "      <td>0.651655</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>877</td>\n",
       "      <td>30000</td>\n",
       "      <td>29123</td>\n",
       "      <td>0.087473</td>\n",
       "      <td>0.208060</td>\n",
       "      <td>0.350952</td>\n",
       "      <td>1653</td>\n",
       "      <td>29891</td>\n",
       "      <td>28238</td>\n",
       "      <td>0.164871</td>\n",
       "      <td>0.201737</td>\n",
       "      <td>0.597328</td>\n",
       "      <td>4031</td>\n",
       "      <td>59757</td>\n",
       "      <td>55726</td>\n",
       "      <td>0.402055</td>\n",
       "      <td>0.398117</td>\n",
       "      <td>0.698081</td>\n",
       "      <td>1846</td>\n",
       "      <td>27684</td>\n",
       "      <td>25838</td>\n",
       "      <td>0.184121</td>\n",
       "      <td>0.184591</td>\n",
       "      <td>0.691873</td>\n",
       "      <td>2748</td>\n",
       "      <td>52338</td>\n",
       "      <td>49590</td>\n",
       "      <td>0.274087</td>\n",
       "      <td>0.354280</td>\n",
       "      <td>0.573037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149998</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5716.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>(-0.001, 0.134]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(-0.001, 4.0]</td>\n",
       "      <td>(-inf, 0.0]</td>\n",
       "      <td>3296</td>\n",
       "      <td>32069</td>\n",
       "      <td>28773</td>\n",
       "      <td>0.328745</td>\n",
       "      <td>0.205560</td>\n",
       "      <td>0.955231</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>582</td>\n",
       "      <td>30000</td>\n",
       "      <td>29418</td>\n",
       "      <td>0.058049</td>\n",
       "      <td>0.210168</td>\n",
       "      <td>0.243890</td>\n",
       "      <td>1830</td>\n",
       "      <td>30000</td>\n",
       "      <td>28170</td>\n",
       "      <td>0.182525</td>\n",
       "      <td>0.201252</td>\n",
       "      <td>0.645506</td>\n",
       "      <td>1819</td>\n",
       "      <td>29961</td>\n",
       "      <td>28142</td>\n",
       "      <td>0.181428</td>\n",
       "      <td>0.201052</td>\n",
       "      <td>0.643114</td>\n",
       "      <td>3103</td>\n",
       "      <td>33659</td>\n",
       "      <td>30556</td>\n",
       "      <td>0.309495</td>\n",
       "      <td>0.218298</td>\n",
       "      <td>0.882845</td>\n",
       "      <td>4672</td>\n",
       "      <td>56188</td>\n",
       "      <td>51516</td>\n",
       "      <td>0.465988</td>\n",
       "      <td>0.368040</td>\n",
       "      <td>0.818076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149999</th>\n",
       "      <td>0</td>\n",
       "      <td>0.850283</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0.249908</td>\n",
       "      <td>8158.0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>(-inf, 2.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>(0.134, 0.287]</td>\n",
       "      <td>(5400.0, 8250.0]</td>\n",
       "      <td>(6.0, 9.0]</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>952</td>\n",
       "      <td>27424</td>\n",
       "      <td>26472</td>\n",
       "      <td>0.094953</td>\n",
       "      <td>0.189121</td>\n",
       "      <td>0.406848</td>\n",
       "      <td>8793</td>\n",
       "      <td>136664</td>\n",
       "      <td>127871</td>\n",
       "      <td>0.87702</td>\n",
       "      <td>0.913534</td>\n",
       "      <td>0.67296</td>\n",
       "      <td>7450</td>\n",
       "      <td>142051</td>\n",
       "      <td>134601</td>\n",
       "      <td>0.743068</td>\n",
       "      <td>0.961614</td>\n",
       "      <td>0.572521</td>\n",
       "      <td>9033</td>\n",
       "      <td>148127</td>\n",
       "      <td>139094</td>\n",
       "      <td>0.900958</td>\n",
       "      <td>0.993713</td>\n",
       "      <td>0.645352</td>\n",
       "      <td>8319</td>\n",
       "      <td>146905</td>\n",
       "      <td>138586</td>\n",
       "      <td>0.829743</td>\n",
       "      <td>0.990084</td>\n",
       "      <td>0.608707</td>\n",
       "      <td>5963</td>\n",
       "      <td>30000</td>\n",
       "      <td>24037</td>\n",
       "      <td>0.594754</td>\n",
       "      <td>0.171725</td>\n",
       "      <td>1.495914</td>\n",
       "      <td>1716</td>\n",
       "      <td>30000</td>\n",
       "      <td>28284</td>\n",
       "      <td>0.171155</td>\n",
       "      <td>0.202066</td>\n",
       "      <td>0.613576</td>\n",
       "      <td>1819</td>\n",
       "      <td>29961</td>\n",
       "      <td>28142</td>\n",
       "      <td>0.181428</td>\n",
       "      <td>0.201052</td>\n",
       "      <td>0.643114</td>\n",
       "      <td>2017</td>\n",
       "      <td>37162</td>\n",
       "      <td>35145</td>\n",
       "      <td>0.201177</td>\n",
       "      <td>0.251082</td>\n",
       "      <td>0.588475</td>\n",
       "      <td>1765</td>\n",
       "      <td>31522</td>\n",
       "      <td>29757</td>\n",
       "      <td>0.176042</td>\n",
       "      <td>0.212589</td>\n",
       "      <td>0.603269</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150000 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        SeriousDlqin2yrs  RevolvingUtilizationOfUnsecuredLines  age  NumberOfTime30-59DaysPastDueNotWorse    DebtRatio  MonthlyIncome  NumberOfOpenCreditLinesAndLoans  NumberOfTimes90DaysLate  NumberRealEstateLoansOrLines  NumberOfTime60-89DaysPastDueNotWorse  NumberOfDependents       bin_age bin_NumberOfDependents bin_NumberOfTime30-59DaysPastDueNotWorse bin_NumberOfTime60-89DaysPastDueNotWorse bin_NumberOfTimes90DaysLate bin_RevolvingUtilizationOfUnsecuredLines    bin_DebtRatio    bin_MonthlyIncome bin_NumberOfOpenCreditLinesAndLoans bin_NumberRealEstateLoansOrLines  bad_bin_age  all_bin_age  good_bin_age  Margin Bad_bin_age  Margin Good_bin_age  woe_bin_age  bad_bin_NumberOfDependents  all_bin_NumberOfDependents  good_bin_NumberOfDependents  Margin Bad_bin_NumberOfDependents  Margin Good_bin_NumberOfDependents  woe_bin_NumberOfDependents  bad_bin_NumberOfTime30-59DaysPastDueNotWorse  all_bin_NumberOfTime30-59DaysPastDueNotWorse  good_bin_NumberOfTime30-59DaysPastDueNotWorse  Margin Bad_bin_NumberOfTime30-59DaysPastDueNotWorse  Margin Good_bin_NumberOfTime30-59DaysPastDueNotWorse  woe_bin_NumberOfTime30-59DaysPastDueNotWorse  bad_bin_NumberOfTime60-89DaysPastDueNotWorse  all_bin_NumberOfTime60-89DaysPastDueNotWorse  good_bin_NumberOfTime60-89DaysPastDueNotWorse  Margin Bad_bin_NumberOfTime60-89DaysPastDueNotWorse  Margin Good_bin_NumberOfTime60-89DaysPastDueNotWorse  woe_bin_NumberOfTime60-89DaysPastDueNotWorse  bad_bin_NumberOfTimes90DaysLate  all_bin_NumberOfTimes90DaysLate  good_bin_NumberOfTimes90DaysLate  Margin Bad_bin_NumberOfTimes90DaysLate  Margin Good_bin_NumberOfTimes90DaysLate  woe_bin_NumberOfTimes90DaysLate  bad_bin_RevolvingUtilizationOfUnsecuredLines  all_bin_RevolvingUtilizationOfUnsecuredLines  good_bin_RevolvingUtilizationOfUnsecuredLines  Margin Bad_bin_RevolvingUtilizationOfUnsecuredLines  Margin Good_bin_RevolvingUtilizationOfUnsecuredLines  woe_bin_RevolvingUtilizationOfUnsecuredLines  bad_bin_DebtRatio  all_bin_DebtRatio  good_bin_DebtRatio  Margin Bad_bin_DebtRatio  Margin Good_bin_DebtRatio  woe_bin_DebtRatio  bad_bin_MonthlyIncome  all_bin_MonthlyIncome  good_bin_MonthlyIncome  Margin Bad_bin_MonthlyIncome  Margin Good_bin_MonthlyIncome  woe_bin_MonthlyIncome  bad_bin_NumberOfOpenCreditLinesAndLoans  all_bin_NumberOfOpenCreditLinesAndLoans  good_bin_NumberOfOpenCreditLinesAndLoans  Margin Bad_bin_NumberOfOpenCreditLinesAndLoans  Margin Good_bin_NumberOfOpenCreditLinesAndLoans  woe_bin_NumberOfOpenCreditLinesAndLoans  bad_bin_NumberRealEstateLoansOrLines  all_bin_NumberRealEstateLoansOrLines  good_bin_NumberRealEstateLoansOrLines  Margin Bad_bin_NumberRealEstateLoansOrLines  Margin Good_bin_NumberRealEstateLoansOrLines  woe_bin_NumberRealEstateLoansOrLines\n",
       "0                      1                              0.766127   45                                     2     0.802982         9120.0                               13                        0                             6                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                               (1.0, 2.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]     (0.468, 4.0]  (8250.0, 3008750.0]                        (12.0, 58.0]                       (3.0, inf]         2893        35037         32144            0.288550             0.229643     0.813822                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          1219                                          4598                                           3379                                           0.121584                                             0.024140                                         1.797837                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               2974              30109               27135                  0.296629                   0.193857           0.928274                   1387                  29993                   28606                      0.138340                       0.204367               0.516960                                     1846                                    27684                                     25838                                        0.184121                                         0.184591                                 0.691873                                   419                                  3652                                   3233                                     0.041791                                      0.023097                              1.032961\n",
       "1                      0                              0.957151   40                                     0     0.121876         2600.0                                4                        0                             0                                     0                 1.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               1830              30000               28170                  0.182525                   0.201252           0.645506                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "2                      0                              0.658180   38                                     1     0.085113         3042.0                                2                        1                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]  (-0.001, 0.134]     (-0.001, 3400.0]                       (-0.001, 4.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          2107                                         30000                                          27893                                           0.210154                                             0.199273                                         0.720083               1830              30000               28170                  0.182525                   0.201252           0.645506                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "3                      0                              0.233810   30                                     0     0.036050         3300.0                                5                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (-0.001, 0.134]     (-0.001, 3400.0]                          (4.0, 6.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           877                                         30000                                          29123                                           0.087473                                             0.208060                                         0.350952               1830              30000               28170                  0.182525                   0.201252           0.645506                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     1573                                    26545                                     24972                                        0.156892                                         0.178405                                 0.630962                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "4                      0                              0.907239   49                                     1     0.024926        63588.0                                7                        0                             1                                     0                 0.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]  (-0.001, 0.134]  (8250.0, 3008750.0]                          (6.0, 9.0]                       (0.0, 1.0]         2893        35037         32144            0.288550             0.229643     0.813822                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               1830              30000               28170                  0.182525                   0.201252           0.645506                   1387                  29993                   28606                      0.138340                       0.204367               0.516960                                     2017                                    37162                                     35145                                        0.201177                                         0.251082                                 0.588475                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "...                  ...                                   ...  ...                                   ...          ...            ...                              ...                      ...                           ...                                   ...                 ...           ...                    ...                                      ...                                      ...                         ...                                      ...              ...                  ...                                 ...                              ...          ...          ...           ...                 ...                  ...          ...                         ...                         ...                          ...                                ...                                 ...                         ...                                           ...                                           ...                                            ...                                                ...                                                  ...                                              ...                                           ...                                           ...                                            ...                                                ...                                                  ...                                              ...                              ...                              ...                               ...                                     ...                                      ...                              ...                                           ...                                           ...                                            ...                                                ...                                                  ...                                              ...                ...                ...                 ...                       ...                        ...                ...                    ...                    ...                     ...                           ...                            ...                    ...                                      ...                                      ...                                       ...                                             ...                                              ...                                      ...                                   ...                                   ...                                    ...                                          ...                                           ...                                   ...\n",
       "149995                 0                              0.040674   74                                     0     0.225131         2100.0                                4                        0                             1                                     0                 0.0   (70.0, inf]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.0192, 0.0832]   (0.134, 0.287]     (-0.001, 3400.0]                       (-0.001, 4.0]                       (0.0, 1.0]          398        17636         17238            0.039697             0.123151     0.279404                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           497                                         30000                                          29503                                           0.049571                                             0.210775                                         0.211221               1716              30000               28284                  0.171155                   0.202066           0.613576                   2789                  30289                   27500                      0.278177                       0.196465               0.882076                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "149996                 0                              0.299745   44                                     0     0.716562         5584.0                                4                        0                             1                                     0                 2.0  (40.0, 50.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                           (0.271, 0.699]     (0.468, 4.0]     (5400.0, 8250.0]                       (-0.001, 4.0]                       (0.0, 1.0]         2893        35037         32144            0.288550             0.229643     0.813822                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          2107                                         30000                                          27893                                           0.210154                                             0.199273                                         0.720083               2974              30109               27135                  0.296629                   0.193857           0.928274                   1819                  29961                   28142                      0.181428                       0.201052               0.643114                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "149997                 0                              0.246044   58                                     0  3870.000000         5400.0                               18                        0                             1                                     0                 0.0  (50.0, 60.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                          (0.0832, 0.271]  (4.0, 329664.0]     (3400.0, 5400.0]                        (12.0, 58.0]                       (0.0, 1.0]         2149        34806         32657            0.214343             0.233308     0.651655                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           877                                         30000                                          29123                                           0.087473                                             0.208060                                         0.350952               1653              29891               28238                  0.164871                   0.201737           0.597328                   4031                  59757                   55726                      0.402055                       0.398117               0.698081                                     1846                                    27684                                     25838                                        0.184121                                         0.184591                                 0.691873                                  2748                                 52338                                  49590                                     0.274087                                      0.354280                              0.573037\n",
       "149998                 0                              0.000000   30                                     0     0.000000         5716.0                                4                        0                             0                                     0                 0.0  (25.0, 40.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (-0.001, 0.0192]  (-0.001, 0.134]     (5400.0, 8250.0]                       (-0.001, 4.0]                      (-inf, 0.0]         3296        32069         28773            0.328745             0.205560     0.955231                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                           582                                         30000                                          29418                                           0.058049                                             0.210168                                         0.243890               1830              30000               28170                  0.182525                   0.201252           0.645506                   1819                  29961                   28142                      0.181428                       0.201052               0.643114                                     3103                                    33659                                     30556                                        0.309495                                         0.218298                                 0.882845                                  4672                                 56188                                  51516                                     0.465988                                      0.368040                              0.818076\n",
       "149999                 0                              0.850283   64                                     0     0.249908         8158.0                                8                        0                             2                                     0                 0.0  (60.0, 70.0]            (-inf, 2.0]                              (-inf, 1.0]                              (-inf, 1.0]                 (-inf, 1.0]                         (0.699, 50708.0]   (0.134, 0.287]     (5400.0, 8250.0]                          (6.0, 9.0]                       (1.0, 2.0]          952        27424         26472            0.094953             0.189121     0.406848                        8793                      136664                       127871                            0.87702                            0.913534                     0.67296                                          7450                                        142051                                         134601                                           0.743068                                             0.961614                                         0.572521                                          9033                                        148127                                         139094                                           0.900958                                             0.993713                                         0.645352                             8319                           146905                            138586                                0.829743                                 0.990084                         0.608707                                          5963                                         30000                                          24037                                           0.594754                                             0.171725                                         1.495914               1716              30000               28284                  0.171155                   0.202066           0.613576                   1819                  29961                   28142                      0.181428                       0.201052               0.643114                                     2017                                    37162                                     35145                                        0.201177                                         0.251082                                 0.588475                                  1765                                 31522                                  29757                                     0.176042                                      0.212589                              0.603269\n",
       "\n",
       "[150000 rows x 81 columns]"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算这些特征的woe# 计算这些特征 \n",
    "def cal_WOE(df, features, target):\n",
    "    df_new = df.copy()\n",
    "    for f in features:\n",
    "        df_woe = df_new.groupby(f).agg({target:['sum', 'count']})\n",
    "        df_woe.columns = list(map(''.join, df_woe.columns.values))\n",
    "        df_woe = df_woe.reset_index()#重新再编码一下\n",
    "        df_woe = df_woe.rename(columns={target+'sum':'bad', target+'count':'all'})\n",
    "        # 这里是下面是计算WOE和IV的过程\n",
    "        df_woe['good'] = df_woe['all'] - df_woe['bad']\n",
    "        df_woe['Margin Bad'] = df_woe['bad'] / df_woe['bad'].sum() \n",
    "        df_woe['Margin Good'] = df_woe['good'] / df_woe['good'].sum()\n",
    "        df_woe['woe'] = np.log1p(df_woe['Margin Bad'] / df_woe['Margin Good'])\n",
    "        # 避免重名  df_woe每次循环一个特征都有一个bad和all  按照名字下划线添加到df_woe里去 \n",
    "        df_woe.columns = [c if c==f else c+'_'+f for c in list(df_woe.columns.values)]#先更名名称 再按照特赠那个字段左连接\n",
    "        df_new = df_new.merge(df_woe, on=f, how='left')\n",
    "    return df_new\n",
    "# 计算这些特征的WOE\n",
    "df_woe = cal_WOE(df_train, bin_cols, 'SeriousDlqin2yrs')\n",
    "df_woe# bin_cols里面有10个特征  每个特征都增加了 bad all good MarginBad MarginGood woe  所以要增加60个字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'NumberOfTimes90DaysLate',\n",
       " 'RevolvingUtilizationOfUnsecuredLines',\n",
       " 'age']"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#只筛选那5个我们要的规则的WOE\n",
    "feature_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>features</th>\n",
       "      <th>bin</th>\n",
       "      <th>woe</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>1.797837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.572521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.151185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>2.429111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>2.520613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>2.774776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.902860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1052</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>2.812612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6909</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>2.024184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10822</th>\n",
       "      <td>NumberOfTime30-59DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>2.077007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.645352</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2.712133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>3.159234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1146</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.955438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.886833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2406</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>3.164917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6664</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>3.758483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16642</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>2.915139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23964</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>2.705454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68976</th>\n",
       "      <td>NumberOfTime60-89DaysPastDueNotWorse</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(-inf, 1.0]</td>\n",
       "      <td>0.608707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(2.0, 3.0]</td>\n",
       "      <td>2.998746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(1.0, 2.0]</td>\n",
       "      <td>2.701853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1298</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(4.0, 5.0]</td>\n",
       "      <td>3.224503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1713</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(3.0, 4.0]</td>\n",
       "      <td>3.379582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1733</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(9.0, inf]</td>\n",
       "      <td>2.878935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2910</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(8.0, 9.0]</td>\n",
       "      <td>3.691154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3400</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(5.0, 6.0]</td>\n",
       "      <td>3.088387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3929</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(6.0, 7.0]</td>\n",
       "      <td>4.140397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5684</th>\n",
       "      <td>NumberOfTimes90DaysLate</td>\n",
       "      <td>(7.0, 8.0]</td>\n",
       "      <td>3.580814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.699, 50708.0]</td>\n",
       "      <td>1.495914</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.271, 0.699]</td>\n",
       "      <td>0.720083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0832, 0.271]</td>\n",
       "      <td>0.350952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(-0.001, 0.0192]</td>\n",
       "      <td>0.243890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>RevolvingUtilizationOfUnsecuredLines</td>\n",
       "      <td>(0.0192, 0.0832]</td>\n",
       "      <td>0.211221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>age</td>\n",
       "      <td>(40.0, 50.0]</td>\n",
       "      <td>0.813822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>age</td>\n",
       "      <td>(25.0, 40.0]</td>\n",
       "      <td>0.955231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>age</td>\n",
       "      <td>(70.0, inf]</td>\n",
       "      <td>0.279404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>age</td>\n",
       "      <td>(50.0, 60.0]</td>\n",
       "      <td>0.651655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>age</td>\n",
       "      <td>(60.0, 70.0]</td>\n",
       "      <td>0.406848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>age</td>\n",
       "      <td>(-inf, 25.0]</td>\n",
       "      <td>1.013134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   features               bin       woe\n",
       "0      NumberOfTime30-59DaysPastDueNotWorse        (1.0, 2.0]  1.797837\n",
       "1      NumberOfTime30-59DaysPastDueNotWorse       (-inf, 1.0]  0.572521\n",
       "13     NumberOfTime30-59DaysPastDueNotWorse        (2.0, 3.0]  2.151185\n",
       "183    NumberOfTime30-59DaysPastDueNotWorse        (3.0, 4.0]  2.429111\n",
       "191    NumberOfTime30-59DaysPastDueNotWorse        (4.0, 5.0]  2.520613\n",
       "251    NumberOfTime30-59DaysPastDueNotWorse        (6.0, 7.0]  2.774776\n",
       "423    NumberOfTime30-59DaysPastDueNotWorse        (9.0, inf]  2.902860\n",
       "1052   NumberOfTime30-59DaysPastDueNotWorse        (5.0, 6.0]  2.812612\n",
       "6909   NumberOfTime30-59DaysPastDueNotWorse        (7.0, 8.0]  2.024184\n",
       "10822  NumberOfTime30-59DaysPastDueNotWorse        (8.0, 9.0]  2.077007\n",
       "0      NumberOfTime60-89DaysPastDueNotWorse       (-inf, 1.0]  0.645352\n",
       "186    NumberOfTime60-89DaysPastDueNotWorse        (1.0, 2.0]  2.712133\n",
       "423    NumberOfTime60-89DaysPastDueNotWorse        (4.0, 5.0]  3.159234\n",
       "1146   NumberOfTime60-89DaysPastDueNotWorse        (2.0, 3.0]  2.955438\n",
       "1733   NumberOfTime60-89DaysPastDueNotWorse        (9.0, inf]  2.886833\n",
       "2406   NumberOfTime60-89DaysPastDueNotWorse        (3.0, 4.0]  3.164917\n",
       "6664   NumberOfTime60-89DaysPastDueNotWorse        (5.0, 6.0]  3.758483\n",
       "16642  NumberOfTime60-89DaysPastDueNotWorse        (6.0, 7.0]  2.915139\n",
       "23964  NumberOfTime60-89DaysPastDueNotWorse        (7.0, 8.0]  2.705454\n",
       "68976  NumberOfTime60-89DaysPastDueNotWorse        (8.0, 9.0]  0.000000\n",
       "0                   NumberOfTimes90DaysLate       (-inf, 1.0]  0.608707\n",
       "13                  NumberOfTimes90DaysLate        (2.0, 3.0]  2.998746\n",
       "186                 NumberOfTimes90DaysLate        (1.0, 2.0]  2.701853\n",
       "1298                NumberOfTimes90DaysLate        (4.0, 5.0]  3.224503\n",
       "1713                NumberOfTimes90DaysLate        (3.0, 4.0]  3.379582\n",
       "1733                NumberOfTimes90DaysLate        (9.0, inf]  2.878935\n",
       "2910                NumberOfTimes90DaysLate        (8.0, 9.0]  3.691154\n",
       "3400                NumberOfTimes90DaysLate        (5.0, 6.0]  3.088387\n",
       "3929                NumberOfTimes90DaysLate        (6.0, 7.0]  4.140397\n",
       "5684                NumberOfTimes90DaysLate        (7.0, 8.0]  3.580814\n",
       "0      RevolvingUtilizationOfUnsecuredLines  (0.699, 50708.0]  1.495914\n",
       "2      RevolvingUtilizationOfUnsecuredLines    (0.271, 0.699]  0.720083\n",
       "3      RevolvingUtilizationOfUnsecuredLines   (0.0832, 0.271]  0.350952\n",
       "11     RevolvingUtilizationOfUnsecuredLines  (-0.001, 0.0192]  0.243890\n",
       "14     RevolvingUtilizationOfUnsecuredLines  (0.0192, 0.0832]  0.211221\n",
       "0                                       age      (40.0, 50.0]  0.813822\n",
       "1                                       age      (25.0, 40.0]  0.955231\n",
       "5                                       age       (70.0, inf]  0.279404\n",
       "6                                       age      (50.0, 60.0]  0.651655\n",
       "15                                      age      (60.0, 70.0]  0.406848\n",
       "19                                      age      (-inf, 25.0]  1.013134"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 得到WOE规则 feature, bin, woe\n",
    "df_bin_to_woe = pd.DataFrame(columns=['features', 'bin', 'woe'])\n",
    "for f in feature_cols:\n",
    "    b = 'bin_' + f\n",
    "    w = 'woe_bin_' + f\n",
    "    df = df_woe[[w, b]].drop_duplicates()#通过bin和woe_bin来获取到指定列的元素，drop_duplicates()可以去重 这里按照woe的数值去重\n",
    "    df.columns = ['woe', 'bin']#起个索引的名称\n",
    "    df['features'] = f#再加上一个字段\n",
    "    df_bin_to_woe = pd.concat([df_bin_to_woe, df])#然后再进行拼接\n",
    "df_bin_to_woe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 6开始用逻辑回归来进行建模"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.1筛选特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['woe_bin_age',\n",
       " 'woe_bin_NumberOfDependents',\n",
       " 'woe_bin_NumberOfTime30-59DaysPastDueNotWorse',\n",
       " 'woe_bin_NumberOfTime60-89DaysPastDueNotWorse',\n",
       " 'woe_bin_NumberOfTimes90DaysLate',\n",
       " 'woe_bin_RevolvingUtilizationOfUnsecuredLines',\n",
       " 'woe_bin_DebtRatio',\n",
       " 'woe_bin_MonthlyIncome',\n",
       " 'woe_bin_NumberOfOpenCreditLinesAndLoans',\n",
       " 'woe_bin_NumberRealEstateLoansOrLines']"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "woe_cols=[c for c in list(df_woe.columns.values) if 'woe' in c ]\n",
    "print(len(woe_cols))\n",
    "woe_cols#这里只选特征里面带有WOE的值就行建模  就是上面算出来的那个10个WOE值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.2数据集切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(120000, 10)\n",
      "(30000, 10)\n",
      "(120000,)\n",
      "(30000,)\n"
     ]
    }
   ],
   "source": [
    "#数据集切分\n",
    "from sklearn.model_selection import train_test_split\n",
    "x_train,x_test,y_train,y_test=train_test_split(df_woe[woe_cols],df_woe['SeriousDlqin2yrs'],test_size=0.2,random_state=33)\n",
    "print(x_train.shape)\n",
    "print(x_test.shape)\n",
    "print(y_train.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    120000.000000\n",
       "mean          0.066867\n",
       "std           0.249792\n",
       "min           0.000000\n",
       "25%           0.000000\n",
       "50%           0.000000\n",
       "75%           0.000000\n",
       "max           1.000000\n",
       "Name: SeriousDlqin2yrs, dtype: float64"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.3模型训练与评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9361\n",
      "0.7711361926102505\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score,roc_auc_score\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "#建好逻辑回归的模型\n",
    "model=LogisticRegression(random_state=33)\n",
    "model.fit(x_train ,y_train )#注意这个地方报了一下错误 nan toolarge  是因为有个地方出现了inf和NAN  解决的方法是上面np.log变成np.log1p\n",
    "y_pred=model.predict(x_test)\n",
    "print(accuracy_score(y_pred,y_test))\n",
    "print(roc_auc_score(y_pred,y_test))#Auc的值有点低  一般大于0.8才可以上线"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
